]> jfr.im git - yt-dlp.git/commitdiff
[youtube] Add storyboard formats
authorpukkandan <redacted>
Tue, 16 Nov 2021 19:56:23 +0000 (01:26 +0530)
committerpukkandan <redacted>
Tue, 16 Nov 2021 19:59:34 +0000 (01:29 +0530)
Closes: #1553, https://github.com/ytdl-org/youtube-dl/issues/9868
Related: https://github.com/ytdl-org/youtube-dl/pull/14951

yt_dlp/extractor/youtube.py

index 203f4a92adecc0577bf148a54297af496981abdd..41e7fce101699b09f2a7fb6b2f2ed21a85cee5bf 100644 (file)
@@ -9,6 +9,7 @@
 import hashlib
 import itertools
 import json
+import math
 import os.path
 import random
 import re
@@ -28,6 +29,7 @@
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
+    bug_reports_message,
     bytes_to_intlist,
     clean_html,
     datetime_from_str,
 )
 
 
+def get_first(obj, keys, **kwargs):
+    return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
+
+
 # any clients starting with _ cannot be explicity requested by the user
 INNERTUBE_CLIENTS = {
     'web': {
@@ -2586,6 +2592,41 @@ def process_manifest_format(f, proto, itag):
                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
                         yield f
 
+    def _extract_storyboard(self, player_responses, duration):
+        spec = get_first(
+            player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
+        if not spec:
+            return
+        base_url = spec.pop()
+        L = len(spec) - 1
+        for i, args in enumerate(spec):
+            args = args.split('#')
+            counts = list(map(int_or_none, args[:5]))
+            if len(args) != 8 or not all(counts):
+                self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
+                continue
+            width, height, frame_count, cols, rows = counts
+            N, sigh = args[6:]
+
+            url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
+            fragment_count = frame_count / (cols * rows)
+            fragment_duration = duration / fragment_count
+            yield {
+                'format_id': f'sb{i}',
+                'format_note': 'storyboard',
+                'ext': 'mhtml',
+                'protocol': 'mhtml',
+                'acodec': 'none',
+                'vcodec': 'none',
+                'url': url,
+                'width': width,
+                'height': height,
+                'fragments': [{
+                    'path': url.replace('$M', str(j)),
+                    'duration': min(fragment_duration, duration - (j * fragment_duration)),
+                } for j in range(math.ceil(fragment_count))],
+            }
+
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         video_id = self._match_id(url)
@@ -2603,8 +2644,6 @@ def _real_extract(self, url):
             self._get_requested_clients(url, smuggled_data),
             video_id, webpage, master_ytcfg)
 
-        get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
-
         playability_statuses = traverse_obj(
             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
 
@@ -2700,10 +2739,6 @@ def feed_entry(name):
             if reason:
                 self.raise_no_formats(reason, expected=True)
 
-        # Source is given priority since formats that throttle are given lower source_preference
-        # When throttling issue is fully fixed, remove this
-        self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
-
         keywords = get_first(video_details, 'keywords', expected_type=list) or []
         if not keywords and webpage:
             keywords = [
@@ -2791,6 +2826,12 @@ def feed_entry(name):
         if not duration and live_endtime and live_starttime:
             duration = live_endtime - live_starttime
 
+        formats.extend(self._extract_storyboard(player_responses, duration))
+
+        # Source is given priority since formats that throttle are given lower source_preference
+        # When throttling issue is fully fixed, remove this
+        self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
+
         info = {
             'id': video_id,
             'title': self._live_title(video_title) if is_live else video_title,