[hls,aes] Fallback to native implementation for AES-CBC

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 9057824bed554643fcc8c5b3df7496ac78ff9ec9..c53c7ec38e16c292f03aebe5e18bc30e686ba59f 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -35,6 +35,7 @@
      compat_kwargs,
      compat_numeric_types,
      compat_os_name,
+    compat_pycrypto_AES,
      compat_shlex_quote,
      compat_str,
      compat_tokenize_tokenize,
@@ -461,7 +462,7 @@ class YoutubeDL(object):
      ))
  
      params = None
-    _ies = []
+    _ies = {}
      _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
      _printed_messages = set()
      _first_webpage_request = True
@@ -475,7 +476,7 @@ def __init__(self, params=None, auto_init=True):
          """Create a FileDownloader object with the given options."""
          if params is None:
              params = {}
-        self._ies = []
+        self._ies = {}
          self._ies_instances = {}
          self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
          self._printed_messages = set()
@@ -497,6 +498,12 @@ def __init__(self, params=None, auto_init=True):
              self.report_warning(
                  'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
  
+        if self.params.get('allow_unplayable_formats'):
+            self.report_warning(
+                'You have asked for unplayable formats to be listed/downloaded. '
+                'This is a developer option intended for debugging. '
+                'If you experience any issues while using this option, DO NOT open a bug report')
+
          def check_deprecated(param, option, suggestion):
              if self.params.get(param) is not None:
                  self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
@@ -514,11 +521,6 @@ def check_deprecated(param, option, suggestion):
          for msg in self.params.get('warnings', []):
              self.report_warning(msg)
  
-        if self.params.get('final_ext'):
-            if self.params.get('merge_output_format'):
-                self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
-            self.params['merge_output_format'] = self.params['final_ext']
-
          if self.params.get('overwrites') is None:
              self.params.pop('overwrites', None)
          elif self.params.get('nooverwrites') is not None:
@@ -630,11 +632,19 @@ def warn_if_short_id(self, argv):
  
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
-        self._ies.append(ie)
+        ie_key = ie.ie_key()
+        self._ies[ie_key] = ie
          if not isinstance(ie, type):
-            self._ies_instances[ie.ie_key()] = ie
+            self._ies_instances[ie_key] = ie
              ie.set_downloader(self)
  
+    def _get_info_extractor_class(self, ie_key):
+        ie = self._ies.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)
+            self.add_info_extractor(ie)
+        return ie
+
      def get_info_extractor(self, ie_key):
          """
          Get an instance of an IE with name ie_key, it will try to get one from
@@ -832,6 +842,16 @@ def report_file_delete(self, file_name):
          except UnicodeEncodeError:
              self.to_screen('Deleting existing file')
  
+    def raise_no_formats(self, info, forced=False):
+        has_drm = info.get('__has_drm')
+        msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
+        expected = self.params.get('ignore_no_formats_error')
+        if forced or not expected:
+            raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
+                                 expected=has_drm or expected)
+        else:
+            self.report_warning(msg)
+
      def parse_outtmpl(self):
          outtmpl_dict = self.params.get('outtmpl', {})
          if not isinstance(outtmpl_dict, dict):
@@ -888,7 +908,7 @@ def escape_outtmpl(outtmpl):
      def validate_outtmpl(cls, outtmpl):
          ''' @return None or Exception object '''
          outtmpl = re.sub(
-            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
+            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqB]'),
              lambda mobj: f'{mobj.group(0)[:-1]}s',
              cls._outtmpl_expandpath(outtmpl))
          try:
@@ -920,7 +940,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
          }
  
          TMPL_DICT = {}
-        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
+        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqB]'))
          MATH_FUNCTIONS = {
              '+': float.__add__,
              '-': float.__sub__,
@@ -1012,6 +1032,9 @@ def create_key(outer_mobj):
                  value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
              elif fmt[-1] == 'q':
                  value, fmt = compat_shlex_quote(str(value)), str_fmt
+            elif fmt[-1] == 'B':
+                value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
+                value, fmt = value.decode('utf-8', 'ignore'), 's'
              elif fmt[-1] == 'c':
                  value = str(value)
                  if value is None:
@@ -1117,12 +1140,15 @@ def check_filter():
              if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
                  return 'Skipping "%s" because it is age restricted' % video_title
  
-            if not incomplete:
-                match_filter = self.params.get('match_filter')
-                if match_filter is not None:
-                    ret = match_filter(info_dict)
-                    if ret is not None:
-                        return ret
+            match_filter = self.params.get('match_filter')
+            if match_filter is not None:
+                try:
+                    ret = match_filter(info_dict, incomplete=incomplete)
+                except TypeError:
+                    # For backward compatibility
+                    ret = None if incomplete else match_filter(info_dict)
+                if ret is not None:
+                    return ret
              return None
  
          if self.in_download_archive(info_dict):
@@ -1144,7 +1170,7 @@ def add_extra_info(info_dict, extra_info):
          for key, value in extra_info.items():
              info_dict.setdefault(key, value)
  
-    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+    def extract_info(self, url, download=True, ie_key=None, extra_info=None,
                       process=True, force_generic_extractor=False):
          """
          Return a list with a dictionary for each video extracted.
@@ -1161,39 +1187,36 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
          force_generic_extractor -- force using the generic extractor
          """
  
+        if extra_info is None:
+            extra_info = {}
+
          if not ie_key and force_generic_extractor:
              ie_key = 'Generic'
  
          if ie_key:
-            ies = [self.get_info_extractor(ie_key)]
+            ies = {ie_key: self._get_info_extractor_class(ie_key)}
          else:
              ies = self._ies
  
-        for ie in ies:
+        for ie_key, ie in ies.items():
              if not ie.suitable(url):
                  continue
  
-            ie_key = ie.ie_key()
-            ie = self.get_info_extractor(ie_key)
              if not ie.working():
                  self.report_warning('The program functionality for this site has been marked as broken, '
                                      'and will probably not work.')
  
-            try:
-                temp_id = str_or_none(
-                    ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
-                    else ie._match_id(url))
-            except (AssertionError, IndexError, AttributeError):
-                temp_id = None
+            temp_id = ie.get_temp_id(url)
              if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
                  self.to_screen("[%s] %s: has already been recorded in archive" % (
                                 ie_key, temp_id))
                  break
-            return self.__extract_info(url, ie, download, extra_info, process)
+            return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
          else:
              self.report_error('no suitable InfoExtractor for URL %s' % url)
  
-    def __handle_extraction_exceptions(func, handle_all_errors=True):
+    def __handle_extraction_exceptions(func):
+
          def wrapper(self, *args, **kwargs):
              try:
                  return func(self, *args, **kwargs)
@@ -1210,10 +1233,10 @@ def wrapper(self, *args, **kwargs):
                  self.to_stderr('\r')
                  self.report_warning('The download speed is below throttle limit. Re-extracting data')
                  return wrapper(self, *args, **kwargs)
-            except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
+            except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
                  raise
              except Exception as e:
-                if handle_all_errors and self.params.get('ignoreerrors', False):
+                if self.params.get('ignoreerrors', False):
                      self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
                  else:
                      raise
@@ -1251,7 +1274,7 @@ def add_default_extra_info(self, ie_result, ie, url):
                  'extractor_key': ie.ie_key(),
              })
  
-    def process_ie_result(self, ie_result, download=True, extra_info={}):
+    def process_ie_result(self, ie_result, download=True, extra_info=None):
          """
          Take the result of the ie(may be modified) and resolve all unresolved
          references (URLs, playlist items).
@@ -1259,6 +1282,8 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
          It will also download the videos if 'download'.
          Returns the resolved ie_result.
          """
+        if extra_info is None:
+            extra_info = {}
          result_type = ie_result.get('_type', 'video')
  
          if result_type in ('url', 'url_transparent'):
@@ -1270,10 +1295,14 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
              if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
                      or extract_flat is True):
                  info_copy = ie_result.copy()
-                self.add_extra_info(info_copy, extra_info)
                  ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
+                if not ie_result.get('id'):
+                    info_copy['id'] = ie.get_temp_id(ie_result['url'])
                  self.add_default_extra_info(info_copy, ie, ie_result['url'])
+                self.add_extra_info(info_copy, extra_info)
                  self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+                if self.params.get('force_write_download_archive', False):
+                    self.record_download_archive(info_copy)
                  return ie_result
  
          if result_type == 'video':
@@ -1416,14 +1445,18 @@ def iter_playlistitems(format):
          msg = (
              'Downloading %d videos' if not isinstance(ie_entries, list)
              else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
-        if not isinstance(ie_entries, (list, PagedList)):
-            ie_entries = LazyList(ie_entries)
  
-        def get_entry(i):
-            return YoutubeDL.__handle_extraction_exceptions(
-                lambda self, i: ie_entries[i - 1],
-                False
-            )(self, i)
+        if isinstance(ie_entries, list):
+            def get_entry(i):
+                return ie_entries[i - 1]
+        else:
+            if not isinstance(ie_entries, PagedList):
+                ie_entries = LazyList(ie_entries)
+
+            def get_entry(i):
+                return YoutubeDL.__handle_extraction_exceptions(
+                    lambda self, i: ie_entries[i - 1]
+                )(self, i)
  
          entries = []
          for i in playlistitems or itertools.count(playliststart):
@@ -1449,7 +1482,7 @@ def get_entry(i):
  
          # Save playlist_index before re-ordering
          entries = [
-            ((playlistitems[i - 1] if playlistitems else i), entry)
+            ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
              for i, entry in enumerate(entries, 1)
              if entry is not None]
          n_entries = len(entries)
@@ -1514,8 +1547,8 @@ def get_entry(i):
          max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
          for i, entry_tuple in enumerate(entries, 1):
              playlist_index, entry = entry_tuple
-            if 'playlist_index' in self.params.get('compat_options', []):
-                playlist_index = playlistitems[i - 1] if playlistitems else i
+            if 'playlist-index' in self.params.get('compat_opts', []):
+                playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
              self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
              # This __x_forwarded_for_ip thing is a bit ugly but requires
              # minimal changes
@@ -2050,7 +2083,8 @@ def process_video_result(self, info_dict, download=True):
          if 'id' not in info_dict:
              raise ExtractorError('Missing "id" field in extractor result')
          if 'title' not in info_dict:
-            raise ExtractorError('Missing "title" field in extractor result')
+            raise ExtractorError('Missing "title" field in extractor result',
+                                 video_id=info_dict['id'], ie=info_dict['extractor'])
  
          def report_force_conversion(field, field_not, conversion):
              self.report_warning(
@@ -2151,11 +2185,12 @@ def sanitize_numeric_fields(info):
          else:
              formats = info_dict['formats']
  
+        info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+        if not self.params.get('allow_unplayable_formats'):
+            formats = [f for f in formats if not f.get('has_drm')]
+
          if not formats:
-            if not self.params.get('ignore_no_formats_error'):
-                raise ExtractorError('No video formats found!')
-            else:
-                self.report_warning('No video formats found!')
+            self.raise_no_formats(info_dict)
  
          def is_wellformed(f):
              url = f.get('url')
@@ -2219,7 +2254,7 @@ def is_wellformed(f):
  
          # TODO Central sorting goes here
  
-        if formats and formats[0] is not info_dict:
+        if not formats or formats[0] is not info_dict:
              # only set the 'formats' fields if the original info_dict list them
              # otherwise we end up with a circular reference, the first (and unique)
              # element in the 'formats' field in info_dict is info_dict itself,
@@ -2231,9 +2266,10 @@ def is_wellformed(f):
          if self.params.get('list_thumbnails'):
              self.list_thumbnails(info_dict)
          if self.params.get('listformats'):
-            if not info_dict.get('formats'):
-                raise ExtractorError('No video formats found', expected=True)
-            self.list_formats(info_dict)
+            if not info_dict.get('formats') and not info_dict.get('url'):
+                self.to_screen('%s has no formats' % info_dict['id'])
+            else:
+                self.list_formats(info_dict)
          if self.params.get('listsubtitles'):
              if 'automatic_captions' in info_dict:
                  self.list_subtitles(
@@ -2281,7 +2317,8 @@ def is_wellformed(f):
          formats_to_download = list(format_selector(ctx))
          if not formats_to_download:
              if not self.params.get('ignore_no_formats_error'):
-                raise ExtractorError('Requested format is not available', expected=True)
+                raise ExtractorError('Requested format is not available', expected=True,
+                                     video_id=info_dict['id'], ie=info_dict['extractor'])
              else:
                  self.report_warning('Requested format is not available')
                  # Process what we can, even without any available formats.
@@ -2410,6 +2447,8 @@ def print_optional(field):
              self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
  
      def dl(self, name, info, subtitle=False, test=False):
+        if not info.get('url'):
+            self.raise_no_formats(info, True)
  
          if test:
              verbose = self.params.get('verbose')
@@ -2715,7 +2754,7 @@ def correct_ext(filename, ext=new_ext):
                      _protocols = set(determine_protocol(f) for f in requested_formats)
                      if len(_protocols) == 1:  # All requested formats have same protocol
                          info_dict['protocol'] = _protocols.pop()
-                    directly_mergable = FFmpegFD.can_merge_formats(info_dict)
+                    directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
                      if dl_filename is not None:
                          self.report_file_already_downloaded(dl_filename)
                      elif (directly_mergable and get_suitable_downloader(
@@ -2873,13 +2912,13 @@ def download(self, url_list):
              except UnavailableVideoError:
                  self.report_error('unable to download video')
              except MaxDownloadsReached:
-                self.to_screen('[info] Maximum number of downloaded files reached')
+                self.to_screen('[info] Maximum number of downloads reached')
                  raise
              except ExistingVideoReached:
-                self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+                self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
                  raise
              except RejectedVideoReached:
-                self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
+                self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
                  raise
              else:
                  if self.params.get('dump_single_json', False):
@@ -2908,6 +2947,8 @@ def download_with_info_file(self, info_filename):
      @staticmethod
      def sanitize_info(info_dict, remove_private_keys=False):
          ''' Sanitize the infodict for converting to json '''
+        if info_dict is None:
+            return info_dict
          info_dict.setdefault('epoch', int(time.time()))
          remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
          keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
@@ -3006,9 +3047,9 @@ def _make_archive_id(self, info_dict):
              if not url:
                  return
              # Try to find matching extractor for the URL and take its ie_key
-            for ie in self._ies:
+            for ie_key, ie in self._ies.items():
                  if ie.suitable(url):
-                    extractor = ie.ie_key()
+                    extractor = ie_key
                      break
              else:
                  return
@@ -3255,13 +3296,12 @@ def python_implementation():
          ) or 'none'
          self._write_string('[debug] exe versions: %s\n' % exe_str)
  
-        from .downloader.fragment import can_decrypt_frag
          from .downloader.websocket import has_websockets
          from .postprocessor.embedthumbnail import has_mutagen
          from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
  
          lib_str = ', '.join(sorted(filter(None, (
-            can_decrypt_frag and 'pycryptodome',
+            compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
              has_websockets and 'websockets',
              has_mutagen and 'mutagen',
              SQLITE_AVAILABLE and 'sqlite',