]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/YoutubeDL.py
[MainStreaming] Add extractor (#2180)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
index 11dd36ab871d94946fce007b3a3a0def28895df8..463251789fa3a80eff98bade0104043d3468e01a 100644 (file)
@@ -1161,7 +1161,7 @@ def create_key(outer_mobj):
             str_fmt = f'{fmt[:-1]}s'
             if fmt[-1] == 'l':  # list
                 delim = '\n' if '#' in flags else ', '
-                value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
+                value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
             elif fmt[-1] == 'j':  # json
                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
             elif fmt[-1] == 'q':  # quoted
@@ -2396,6 +2396,9 @@ def sanitize_numeric_fields(info):
             if not get_from_start:
                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
 
+        # backward compatibility
+        info_dict['fulltitle'] = info_dict['title']
+
         if not formats:
             self.raise_no_formats(info_dict)
 
@@ -2552,39 +2555,51 @@ def is_wellformed(f):
                 continue
             break
 
-        info_dict['requested_downloads'] = formats_to_download
-        best_format = formats_to_download[-1] if formats_to_download else {}
         if not formats_to_download:
             if not self.params.get('ignore_no_formats_error'):
                 raise ExtractorError('Requested format is not available', expected=True,
                                      video_id=info_dict['id'], ie=info_dict['extractor'])
-            else:
-                self.report_warning('Requested format is not available')
-                # Process what we can, even without any available formats.
-                self.process_info(dict(info_dict))
-        elif download:
-            self.to_screen(
-                '[info] %s: Downloading %d format(s): %s' % (
-                    info_dict['id'], len(formats_to_download),
-                    ", ".join([f['format_id'] for f in formats_to_download])))
+            self.report_warning('Requested format is not available')
+            # Process what we can, even without any available formats.
+            formats_to_download = [{}]
+
+        best_format = formats_to_download[-1]
+        if download:
+            if best_format:
+                self.to_screen(
+                    f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
+                    + ', '.join([f['format_id'] for f in formats_to_download]))
+            max_downloads_reached = False
             for i, fmt in enumerate(formats_to_download):
                 formats_to_download[i] = new_info = dict(info_dict)
                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
                 new_info.update(fmt)
                 new_info['__original_infodict'] = info_dict
-                self.process_info(new_info)
+                try:
+                    self.process_info(new_info)
+                except MaxDownloadsReached:
+                    max_downloads_reached = True
                 new_info.pop('__original_infodict')
                 # Remove copied info
                 for key, val in tuple(new_info.items()):
                     if info_dict.get(key) == val:
                         new_info.pop(key)
+                if max_downloads_reached:
+                    break
+
+            write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
+            assert write_archive.issubset({True, False, 'ignore'})
+            if True in write_archive and False not in write_archive:
+                self.record_download_archive(info_dict)
 
+            info_dict['requested_downloads'] = formats_to_download
             for pp in self._pps['after_video']:
                 info_dict = self.run_pp(pp, info_dict)
+            if max_downloads_reached:
+                raise MaxDownloadsReached()
 
         # We update the info dict with the selected best quality format (backwards compatibility)
-        if formats_to_download:
-            info_dict.update(best_format)
+        info_dict.update(best_format)
         return info_dict
 
     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
@@ -2742,18 +2757,11 @@ def process_info(self, info_dict):
         assert info_dict.get('_type', 'video') == 'video'
         original_infodict = info_dict
 
-        max_downloads = self.params.get('max_downloads')
-        if max_downloads is not None:
-            if self._num_downloads >= int(max_downloads):
-                raise MaxDownloadsReached()
-
-        # TODO: backward compatibility, to be removed
-        info_dict['fulltitle'] = info_dict['title']
-
         if 'format' not in info_dict and 'ext' in info_dict:
             info_dict['format'] = info_dict['ext']
 
         if self._match_entry(info_dict) is not None:
+            info_dict['__write_download_archive'] = 'ignore'
             return
 
         self.post_extract(info_dict)
@@ -2768,9 +2776,7 @@ def process_info(self, info_dict):
         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
 
         if self.params.get('simulate'):
-            if self.params.get('force_write_download_archive', False):
-                self.record_download_archive(info_dict)
-            # Do nothing else if in simulate mode
+            info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
             return
 
         if full_filename is None:
@@ -2879,12 +2885,12 @@ def replace_info_dict(new_info):
             self.report_error('Preprocessing: %s' % str(err))
             return
 
-        must_record_download_archive = False
-        if self.params.get('skip_download', False):
+        if self.params.get('skip_download'):
             info_dict['filepath'] = temp_filename
             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
             info_dict['__files_to_move'] = files_to_move
             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
+            info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
         else:
             # Download
             info_dict.setdefault('__postprocessors', [])
@@ -3113,11 +3119,14 @@ def ffmpeg_fixup(cndn, msg, cls):
                 except Exception as err:
                     self.report_error('post hooks: %s' % str(err))
                     return
-                must_record_download_archive = True
+                info_dict['__write_download_archive'] = True
 
-        if must_record_download_archive or self.params.get('force_write_download_archive', False):
-            self.record_download_archive(info_dict)
+        if self.params.get('force_write_download_archive'):
+            info_dict['__write_download_archive'] = True
+
+        # Make sure the info_dict was modified in-place
         assert info_dict is original_infodict
+
         max_downloads = self.params.get('max_downloads')
         if max_downloads is not None and self._num_downloads >= int(max_downloads):
             raise MaxDownloadsReached()
@@ -3317,6 +3326,7 @@ def record_download_archive(self, info_dict):
             return
         vid_id = self._make_archive_id(info_dict)
         assert vid_id
+        self.write_debug(f'Adding to archive: {vid_id}')
         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
             archive_file.write(vid_id + '\n')
         self.archive.add(vid_id)