[youtube] Add storyboard formats

author pukkandan <redacted>

Tue, 16 Nov 2021 19:56:23 +0000 (01:26 +0530)

committer pukkandan <redacted>

Tue, 16 Nov 2021 19:59:34 +0000 (01:29 +0530)
author pukkandan <redacted>
Tue, 16 Nov 2021 19:56:23 +0000 (01:26 +0530)
committer pukkandan <redacted>
Tue, 16 Nov 2021 19:59:34 +0000 (01:29 +0530)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 203f4a92adecc0577bf148a54297af496981abdd..41e7fce101699b09f2a7fb6b2f2ed21a85cee5bf 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -9,6 +9,7 @@
  import hashlib
  import itertools
  import json
+import math
  import os.path
  import random
  import re
@@ -28,6 +29,7 @@
  )
  from ..jsinterp import JSInterpreter
  from ..utils import (
+    bug_reports_message,
      bytes_to_intlist,
      clean_html,
      datetime_from_str,
@@ -66,6 +68,10 @@
  )
  
  
+def get_first(obj, keys, **kwargs):
+    return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
+
+
  # any clients starting with _ cannot be explicity requested by the user
  INNERTUBE_CLIENTS = {
      'web': {
@@ -2586,6 +2592,41 @@ def process_manifest_format(f, proto, itag):
                              r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
                          yield f
  
+    def _extract_storyboard(self, player_responses, duration):
+        spec = get_first(
+            player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
+        if not spec:
+            return
+        base_url = spec.pop()
+        L = len(spec) - 1
+        for i, args in enumerate(spec):
+            args = args.split('#')
+            counts = list(map(int_or_none, args[:5]))
+            if len(args) != 8 or not all(counts):
+                self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
+                continue
+            width, height, frame_count, cols, rows = counts
+            N, sigh = args[6:]
+
+            url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
+            fragment_count = frame_count / (cols * rows)
+            fragment_duration = duration / fragment_count
+            yield {
+                'format_id': f'sb{i}',
+                'format_note': 'storyboard',
+                'ext': 'mhtml',
+                'protocol': 'mhtml',
+                'acodec': 'none',
+                'vcodec': 'none',
+                'url': url,
+                'width': width,
+                'height': height,
+                'fragments': [{
+                    'path': url.replace('$M', str(j)),
+                    'duration': min(fragment_duration, duration - (j * fragment_duration)),
+                } for j in range(math.ceil(fragment_count))],
+            }
+
      def _real_extract(self, url):
          url, smuggled_data = unsmuggle_url(url, {})
          video_id = self._match_id(url)
@@ -2603,8 +2644,6 @@ def _real_extract(self, url):
              self._get_requested_clients(url, smuggled_data),
              video_id, webpage, master_ytcfg)
  
-        get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
-
          playability_statuses = traverse_obj(
              player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
  
@@ -2700,10 +2739,6 @@ def feed_entry(name):
              if reason:
                  self.raise_no_formats(reason, expected=True)
  
-        # Source is given priority since formats that throttle are given lower source_preference
-        # When throttling issue is fully fixed, remove this
-        self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
-
          keywords = get_first(video_details, 'keywords', expected_type=list) or []
          if not keywords and webpage:
              keywords = [
@@ -2791,6 +2826,12 @@ def feed_entry(name):
          if not duration and live_endtime and live_starttime:
              duration = live_endtime - live_starttime
  
+        formats.extend(self._extract_storyboard(player_responses, duration))
+
+        # Source is given priority since formats that throttle are given lower source_preference
+        # When throttling issue is fully fixed, remove this
+        self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
+
          info = {
              'id': video_id,
              'title': self._live_title(video_title) if is_live else video_title,
author	pukkandan <redacted>
	Tue, 16 Nov 2021 19:56:23 +0000 (01:26 +0530)
committer	pukkandan <redacted>
	Tue, 16 Nov 2021 19:59:34 +0000 (01:29 +0530)