[extractor/generic] Separate embed extraction into own function (#5176)

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 3b6281066b4cff808633e1462051ecacbe3f43ec..e1c24b89255d65cc6c9eb45095f7ebebd8ad99e9 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -846,7 +846,7 @@ def to_stdout(self, message, skip_eol=False, quiet=None):
                                       'Use "YoutubeDL.to_screen" instead')
          self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
  
-    def to_screen(self, message, skip_eol=False, quiet=None):
+    def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
          """Print message to screen if not in quiet mode"""
          if self.params.get('logger'):
              self.params['logger'].debug(message)
@@ -855,7 +855,7 @@ def to_screen(self, message, skip_eol=False, quiet=None):
              return
          self._write_string(
              '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
-            self._out_files.screen)
+            self._out_files.screen, only_once=only_once)
  
      def to_stderr(self, message, only_once=False):
          """Print message to stderr"""
@@ -1687,8 +1687,8 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
          elif result_type in ('playlist', 'multi_video'):
              # Protect from infinite recursion due to recursively nested playlists
              # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
-            webpage_url = ie_result['webpage_url']
-            if webpage_url in self._playlist_urls:
+            webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
+            if webpage_url and webpage_url in self._playlist_urls:
                  self.to_screen(
                      '[download] Skipping already downloaded playlist: %s'
                      % ie_result.get('title') or ie_result.get('id'))
@@ -1742,14 +1742,17 @@ def _playlist_infodict(ie_result, strict=False, **kwargs):
          }
          if strict:
              return info
+        if ie_result.get('webpage_url'):
+            info.update({
+                'webpage_url': ie_result['webpage_url'],
+                'webpage_url_basename': url_basename(ie_result['webpage_url']),
+                'webpage_url_domain': get_domain(ie_result['webpage_url']),
+            })
          return {
              **info,
              'playlist_index': 0,
              '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
              'extractor': ie_result['extractor'],
-            'webpage_url': ie_result['webpage_url'],
-            'webpage_url_basename': url_basename(ie_result['webpage_url']),
-            'webpage_url_domain': get_domain(ie_result['webpage_url']),
              'extractor_key': ie_result['extractor_key'],
          }
  
@@ -2423,6 +2426,8 @@ def _fill_common_fields(self, info_dict, is_video=True):
              for key in live_keys:
                  if info_dict.get(key) is None:
                      info_dict[key] = (live_status == key)
+        if live_status == 'post_live':
+            info_dict['was_live'] = True
  
          # Auto generate title fields corresponding to the *_number fields when missing
          # in order to always have clean titles. This is very common for TV series.
@@ -2520,11 +2525,7 @@ def sanitize_numeric_fields(info):
          info_dict['requested_subtitles'] = self.process_subtitles(
              info_dict['id'], subtitles, automatic_captions)
  
-        if info_dict.get('formats') is None:
-            # There's only one format available
-            formats = [info_dict]
-        else:
-            formats = info_dict['formats']
+        formats = self._get_formats(info_dict)
  
          # or None ensures --clean-infojson removes it
          info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
@@ -2639,7 +2640,7 @@ def is_wellformed(f):
          info_dict, _ = self.pre_process(info_dict, 'after_filter')
  
          # The pre-processors may have modified the formats
-        formats = info_dict.get('formats', [info_dict])
+        formats = self._get_formats(info_dict)
  
          list_only = self.params.get('simulate') is None and (
              self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
@@ -2697,31 +2698,29 @@ def is_wellformed(f):
              # Process what we can, even without any available formats.
              formats_to_download = [{}]
  
-        requested_ranges = self.params.get('download_ranges')
-        if requested_ranges:
-            requested_ranges = tuple(requested_ranges(info_dict, self))
-
+        requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
          best_format, downloaded_formats = formats_to_download[-1], []
          if download:
-            if best_format:
+            if best_format and requested_ranges:
                  def to_screen(*msg):
                      self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
  
                  to_screen(f'Downloading {len(formats_to_download)} format(s):',
                            (f['format_id'] for f in formats_to_download))
-                if requested_ranges:
+                if requested_ranges != ({}, ):
                      to_screen(f'Downloading {len(requested_ranges)} time ranges:',
-                              (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
+                              (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
              max_downloads_reached = False
  
-            for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
+            for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
                  new_info = self._copy_infodict(info_dict)
                  new_info.update(fmt)
                  offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
+                end_time = offset + min(chapter.get('end_time', duration), duration)
                  if chapter or offset:
                      new_info.update({
                          'section_start': offset + chapter.get('start_time', 0),
-                        'section_end': offset + min(chapter.get('end_time', duration), duration),
+                        'section_end': end_time if end_time < offset + duration else None,
                          'section_title': chapter.get('title'),
                          'section_number': chapter.get('index'),
                      })
@@ -3568,11 +3567,17 @@ def _format_note(self, fdict):
              res += '~' + format_bytes(fdict['filesize_approx'])
          return res
  
-    def render_formats_table(self, info_dict):
-        if not info_dict.get('formats') and not info_dict.get('url'):
-            return None
+    def _get_formats(self, info_dict):
+        if info_dict.get('formats') is None:
+            if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
+                return [info_dict]
+            return []
+        return info_dict['formats']
  
-        formats = info_dict.get('formats', [info_dict])
+    def render_formats_table(self, info_dict):
+        formats = self._get_formats(info_dict)
+        if not formats:
+            return
          if not self.params.get('listformats_table', True) is not False:
              table = [
                  [
@@ -3639,7 +3644,7 @@ def render_thumbnails_table(self, info_dict):
              return None
          return render_table(
              self._list_format_headers('ID', 'Width', 'Height', 'URL'),
-            [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
+            [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
  
      def render_subtitles_table(self, video_id, subtitles):
          def _row(lang, formats):
@@ -3682,6 +3687,8 @@ def print_debug_header(self):
          if not self.params.get('verbose'):
              return
  
+        from . import _IN_CLI  # Must be delayed import
+
          # These imports can be slow. So import them only as needed
          from .extractor.extractors import _LAZY_LOADER
          from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
@@ -3718,6 +3725,7 @@ def get_encoding(stream):
              __version__,
              f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
              '' if source == 'unknown' else f'({source})',
+            '' if _IN_CLI else 'API',
              delim=' '))
          if not _LAZY_LOADER:
              if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):