]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/YoutubeDL.py
[hls,aes] Fallback to native implementation for AES-CBC
[yt-dlp.git] / yt_dlp / YoutubeDL.py
index 9057824bed554643fcc8c5b3df7496ac78ff9ec9..c53c7ec38e16c292f03aebe5e18bc30e686ba59f 100644 (file)
@@ -35,6 +35,7 @@
     compat_kwargs,
     compat_numeric_types,
     compat_os_name,
+    compat_pycrypto_AES,
     compat_shlex_quote,
     compat_str,
     compat_tokenize_tokenize,
@@ -461,7 +462,7 @@ class YoutubeDL(object):
     ))
 
     params = None
-    _ies = []
+    _ies = {}
     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
     _printed_messages = set()
     _first_webpage_request = True
@@ -475,7 +476,7 @@ def __init__(self, params=None, auto_init=True):
         """Create a FileDownloader object with the given options."""
         if params is None:
             params = {}
-        self._ies = []
+        self._ies = {}
         self._ies_instances = {}
         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
         self._printed_messages = set()
@@ -497,6 +498,12 @@ def __init__(self, params=None, auto_init=True):
             self.report_warning(
                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 
+        if self.params.get('allow_unplayable_formats'):
+            self.report_warning(
+                'You have asked for unplayable formats to be listed/downloaded. '
+                'This is a developer option intended for debugging. '
+                'If you experience any issues while using this option, DO NOT open a bug report')
+
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
@@ -514,11 +521,6 @@ def check_deprecated(param, option, suggestion):
         for msg in self.params.get('warnings', []):
             self.report_warning(msg)
 
-        if self.params.get('final_ext'):
-            if self.params.get('merge_output_format'):
-                self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
-            self.params['merge_output_format'] = self.params['final_ext']
-
         if self.params.get('overwrites') is None:
             self.params.pop('overwrites', None)
         elif self.params.get('nooverwrites') is not None:
@@ -630,11 +632,19 @@ def warn_if_short_id(self, argv):
 
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
-        self._ies.append(ie)
+        ie_key = ie.ie_key()
+        self._ies[ie_key] = ie
         if not isinstance(ie, type):
-            self._ies_instances[ie.ie_key()] = ie
+            self._ies_instances[ie_key] = ie
             ie.set_downloader(self)
 
+    def _get_info_extractor_class(self, ie_key):
+        ie = self._ies.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)
+            self.add_info_extractor(ie)
+        return ie
+
     def get_info_extractor(self, ie_key):
         """
         Get an instance of an IE with name ie_key, it will try to get one from
@@ -832,6 +842,16 @@ def report_file_delete(self, file_name):
         except UnicodeEncodeError:
             self.to_screen('Deleting existing file')
 
+    def raise_no_formats(self, info, forced=False):
+        has_drm = info.get('__has_drm')
+        msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
+        expected = self.params.get('ignore_no_formats_error')
+        if forced or not expected:
+            raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
+                                 expected=has_drm or expected)
+        else:
+            self.report_warning(msg)
+
     def parse_outtmpl(self):
         outtmpl_dict = self.params.get('outtmpl', {})
         if not isinstance(outtmpl_dict, dict):
@@ -888,7 +908,7 @@ def escape_outtmpl(outtmpl):
     def validate_outtmpl(cls, outtmpl):
         ''' @return None or Exception object '''
         outtmpl = re.sub(
-            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
+            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqB]'),
             lambda mobj: f'{mobj.group(0)[:-1]}s',
             cls._outtmpl_expandpath(outtmpl))
         try:
@@ -920,7 +940,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
         }
 
         TMPL_DICT = {}
-        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
+        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqB]'))
         MATH_FUNCTIONS = {
             '+': float.__add__,
             '-': float.__sub__,
@@ -1012,6 +1032,9 @@ def create_key(outer_mobj):
                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
             elif fmt[-1] == 'q':
                 value, fmt = compat_shlex_quote(str(value)), str_fmt
+            elif fmt[-1] == 'B':
+                value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
+                value, fmt = value.decode('utf-8', 'ignore'), 's'
             elif fmt[-1] == 'c':
                 value = str(value)
                 if value is None:
@@ -1117,12 +1140,15 @@ def check_filter():
             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
                 return 'Skipping "%s" because it is age restricted' % video_title
 
-            if not incomplete:
-                match_filter = self.params.get('match_filter')
-                if match_filter is not None:
-                    ret = match_filter(info_dict)
-                    if ret is not None:
-                        return ret
+            match_filter = self.params.get('match_filter')
+            if match_filter is not None:
+                try:
+                    ret = match_filter(info_dict, incomplete=incomplete)
+                except TypeError:
+                    # For backward compatibility
+                    ret = None if incomplete else match_filter(info_dict)
+                if ret is not None:
+                    return ret
             return None
 
         if self.in_download_archive(info_dict):
@@ -1144,7 +1170,7 @@ def add_extra_info(info_dict, extra_info):
         for key, value in extra_info.items():
             info_dict.setdefault(key, value)
 
-    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+    def extract_info(self, url, download=True, ie_key=None, extra_info=None,
                      process=True, force_generic_extractor=False):
         """
         Return a list with a dictionary for each video extracted.
@@ -1161,39 +1187,36 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
         force_generic_extractor -- force using the generic extractor
         """
 
+        if extra_info is None:
+            extra_info = {}
+
         if not ie_key and force_generic_extractor:
             ie_key = 'Generic'
 
         if ie_key:
-            ies = [self.get_info_extractor(ie_key)]
+            ies = {ie_key: self._get_info_extractor_class(ie_key)}
         else:
             ies = self._ies
 
-        for ie in ies:
+        for ie_key, ie in ies.items():
             if not ie.suitable(url):
                 continue
 
-            ie_key = ie.ie_key()
-            ie = self.get_info_extractor(ie_key)
             if not ie.working():
                 self.report_warning('The program functionality for this site has been marked as broken, '
                                     'and will probably not work.')
 
-            try:
-                temp_id = str_or_none(
-                    ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
-                    else ie._match_id(url))
-            except (AssertionError, IndexError, AttributeError):
-                temp_id = None
+            temp_id = ie.get_temp_id(url)
             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
                 self.to_screen("[%s] %s: has already been recorded in archive" % (
                                ie_key, temp_id))
                 break
-            return self.__extract_info(url, ie, download, extra_info, process)
+            return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
         else:
             self.report_error('no suitable InfoExtractor for URL %s' % url)
 
-    def __handle_extraction_exceptions(func, handle_all_errors=True):
+    def __handle_extraction_exceptions(func):
+
         def wrapper(self, *args, **kwargs):
             try:
                 return func(self, *args, **kwargs)
@@ -1210,10 +1233,10 @@ def wrapper(self, *args, **kwargs):
                 self.to_stderr('\r')
                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
                 return wrapper(self, *args, **kwargs)
-            except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
+            except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
                 raise
             except Exception as e:
-                if handle_all_errors and self.params.get('ignoreerrors', False):
+                if self.params.get('ignoreerrors', False):
                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
                 else:
                     raise
@@ -1251,7 +1274,7 @@ def add_default_extra_info(self, ie_result, ie, url):
                 'extractor_key': ie.ie_key(),
             })
 
-    def process_ie_result(self, ie_result, download=True, extra_info={}):
+    def process_ie_result(self, ie_result, download=True, extra_info=None):
         """
         Take the result of the ie(may be modified) and resolve all unresolved
         references (URLs, playlist items).
@@ -1259,6 +1282,8 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
         It will also download the videos if 'download'.
         Returns the resolved ie_result.
         """
+        if extra_info is None:
+            extra_info = {}
         result_type = ie_result.get('_type', 'video')
 
         if result_type in ('url', 'url_transparent'):
@@ -1270,10 +1295,14 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
                     or extract_flat is True):
                 info_copy = ie_result.copy()
-                self.add_extra_info(info_copy, extra_info)
                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
+                if not ie_result.get('id'):
+                    info_copy['id'] = ie.get_temp_id(ie_result['url'])
                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
+                self.add_extra_info(info_copy, extra_info)
                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+                if self.params.get('force_write_download_archive', False):
+                    self.record_download_archive(info_copy)
                 return ie_result
 
         if result_type == 'video':
@@ -1416,14 +1445,18 @@ def iter_playlistitems(format):
         msg = (
             'Downloading %d videos' if not isinstance(ie_entries, list)
             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
-        if not isinstance(ie_entries, (list, PagedList)):
-            ie_entries = LazyList(ie_entries)
 
-        def get_entry(i):
-            return YoutubeDL.__handle_extraction_exceptions(
-                lambda self, i: ie_entries[i - 1],
-                False
-            )(self, i)
+        if isinstance(ie_entries, list):
+            def get_entry(i):
+                return ie_entries[i - 1]
+        else:
+            if not isinstance(ie_entries, PagedList):
+                ie_entries = LazyList(ie_entries)
+
+            def get_entry(i):
+                return YoutubeDL.__handle_extraction_exceptions(
+                    lambda self, i: ie_entries[i - 1]
+                )(self, i)
 
         entries = []
         for i in playlistitems or itertools.count(playliststart):
@@ -1449,7 +1482,7 @@ def get_entry(i):
 
         # Save playlist_index before re-ordering
         entries = [
-            ((playlistitems[i - 1] if playlistitems else i), entry)
+            ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
             for i, entry in enumerate(entries, 1)
             if entry is not None]
         n_entries = len(entries)
@@ -1514,8 +1547,8 @@ def get_entry(i):
         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
         for i, entry_tuple in enumerate(entries, 1):
             playlist_index, entry = entry_tuple
-            if 'playlist_index' in self.params.get('compat_options', []):
-                playlist_index = playlistitems[i - 1] if playlistitems else i
+            if 'playlist-index' in self.params.get('compat_opts', []):
+                playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
             # This __x_forwarded_for_ip thing is a bit ugly but requires
             # minimal changes
@@ -2050,7 +2083,8 @@ def process_video_result(self, info_dict, download=True):
         if 'id' not in info_dict:
             raise ExtractorError('Missing "id" field in extractor result')
         if 'title' not in info_dict:
-            raise ExtractorError('Missing "title" field in extractor result')
+            raise ExtractorError('Missing "title" field in extractor result',
+                                 video_id=info_dict['id'], ie=info_dict['extractor'])
 
         def report_force_conversion(field, field_not, conversion):
             self.report_warning(
@@ -2151,11 +2185,12 @@ def sanitize_numeric_fields(info):
         else:
             formats = info_dict['formats']
 
+        info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+        if not self.params.get('allow_unplayable_formats'):
+            formats = [f for f in formats if not f.get('has_drm')]
+
         if not formats:
-            if not self.params.get('ignore_no_formats_error'):
-                raise ExtractorError('No video formats found!')
-            else:
-                self.report_warning('No video formats found!')
+            self.raise_no_formats(info_dict)
 
         def is_wellformed(f):
             url = f.get('url')
@@ -2219,7 +2254,7 @@ def is_wellformed(f):
 
         # TODO Central sorting goes here
 
-        if formats and formats[0] is not info_dict:
+        if not formats or formats[0] is not info_dict:
             # only set the 'formats' fields if the original info_dict list them
             # otherwise we end up with a circular reference, the first (and unique)
             # element in the 'formats' field in info_dict is info_dict itself,
@@ -2231,9 +2266,10 @@ def is_wellformed(f):
         if self.params.get('list_thumbnails'):
             self.list_thumbnails(info_dict)
         if self.params.get('listformats'):
-            if not info_dict.get('formats'):
-                raise ExtractorError('No video formats found', expected=True)
-            self.list_formats(info_dict)
+            if not info_dict.get('formats') and not info_dict.get('url'):
+                self.to_screen('%s has no formats' % info_dict['id'])
+            else:
+                self.list_formats(info_dict)
         if self.params.get('listsubtitles'):
             if 'automatic_captions' in info_dict:
                 self.list_subtitles(
@@ -2281,7 +2317,8 @@ def is_wellformed(f):
         formats_to_download = list(format_selector(ctx))
         if not formats_to_download:
             if not self.params.get('ignore_no_formats_error'):
-                raise ExtractorError('Requested format is not available', expected=True)
+                raise ExtractorError('Requested format is not available', expected=True,
+                                     video_id=info_dict['id'], ie=info_dict['extractor'])
             else:
                 self.report_warning('Requested format is not available')
                 # Process what we can, even without any available formats.
@@ -2410,6 +2447,8 @@ def print_optional(field):
             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
 
     def dl(self, name, info, subtitle=False, test=False):
+        if not info.get('url'):
+            self.raise_no_formats(info, True)
 
         if test:
             verbose = self.params.get('verbose')
@@ -2715,7 +2754,7 @@ def correct_ext(filename, ext=new_ext):
                     _protocols = set(determine_protocol(f) for f in requested_formats)
                     if len(_protocols) == 1:  # All requested formats have same protocol
                         info_dict['protocol'] = _protocols.pop()
-                    directly_mergable = FFmpegFD.can_merge_formats(info_dict)
+                    directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)
                     if dl_filename is not None:
                         self.report_file_already_downloaded(dl_filename)
                     elif (directly_mergable and get_suitable_downloader(
@@ -2873,13 +2912,13 @@ def download(self, url_list):
             except UnavailableVideoError:
                 self.report_error('unable to download video')
             except MaxDownloadsReached:
-                self.to_screen('[info] Maximum number of downloaded files reached')
+                self.to_screen('[info] Maximum number of downloads reached')
                 raise
             except ExistingVideoReached:
-                self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+                self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
                 raise
             except RejectedVideoReached:
-                self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
+                self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
                 raise
             else:
                 if self.params.get('dump_single_json', False):
@@ -2908,6 +2947,8 @@ def download_with_info_file(self, info_filename):
     @staticmethod
     def sanitize_info(info_dict, remove_private_keys=False):
         ''' Sanitize the infodict for converting to json '''
+        if info_dict is None:
+            return info_dict
         info_dict.setdefault('epoch', int(time.time()))
         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
@@ -3006,9 +3047,9 @@ def _make_archive_id(self, info_dict):
             if not url:
                 return
             # Try to find matching extractor for the URL and take its ie_key
-            for ie in self._ies:
+            for ie_key, ie in self._ies.items():
                 if ie.suitable(url):
-                    extractor = ie.ie_key()
+                    extractor = ie_key
                     break
             else:
                 return
@@ -3255,13 +3296,12 @@ def python_implementation():
         ) or 'none'
         self._write_string('[debug] exe versions: %s\n' % exe_str)
 
-        from .downloader.fragment import can_decrypt_frag
         from .downloader.websocket import has_websockets
         from .postprocessor.embedthumbnail import has_mutagen
         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
 
         lib_str = ', '.join(sorted(filter(None, (
-            can_decrypt_frag and 'pycryptodome',
+            compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
             has_websockets and 'websockets',
             has_mutagen and 'mutagen',
             SQLITE_AVAILABLE and 'sqlite',