[extractor] Extract storyboards from SMIL manifests (#1128)

author Felix S <redacted>

Sat, 2 Oct 2021 18:43:42 +0000 (18:43 +0000)

committer GitHub <redacted>

Sat, 2 Oct 2021 18:43:42 +0000 (00:13 +0530)
author Felix S <redacted>
Sat, 2 Oct 2021 18:43:42 +0000 (18:43 +0000)
committer GitHub <redacted>
Sat, 2 Oct 2021 18:43:42 +0000 (00:13 +0530)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index c42a29ee3f04a4829ce74febf6bf12fa3138dcb6..9c4dd3ec522d09ac36b67c28486da95c6a328392 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3029,9 +3029,7 @@ def record_download_archive(self, info_dict):
  
      @staticmethod
      def format_resolution(format, default='unknown'):
-        if format.get('vcodec') == 'none':
-            if format.get('acodec') == 'none':
-                return 'images'
+        if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
              return 'audio only'
          if format.get('resolution') is not None:
              return format['resolution']
@@ -3043,6 +3041,8 @@ def format_resolution(format, default='unknown'):
              res = '%dx?' % format['width']
          else:
              res = default
+        if format.get('vcodec') == 'none' and format.get('acodec') == 'none':
+            res += ' (images)'
          return res
  
      def _format_note(self, fdict):
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 5da29dc63d9e2467045f45145cce6573b351bf8f..f65a098d72e3f48309e254aac873a0081b20b8e7 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2346,14 +2346,15 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
          rtmp_count = 0
          http_count = 0
          m3u8_count = 0
+        imgs_count = 0
  
-        srcs = []
+        srcs = set()
          media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
          for medium in media:
              src = medium.get('src')
              if not src or src in srcs:
                  continue
-            srcs.append(src)
+            srcs.add(src)
  
              bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
              filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
@@ -2427,6 +2428,24 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                      'height': height,
                  })
  
+        for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)):
+            src = medium.get('src')
+            if not src or src in srcs:
+                continue
+            srcs.add(src)
+
+            imgs_count += 1
+            formats.append({
+                'format_id': 'imagestream-%d' % (imgs_count),
+                'url': src,
+                'ext': mimetype2ext(medium.get('type')),
+                'acodec': 'none',
+                'vcodec': 'none',
+                'width': int_or_none(medium.get('width')),
+                'height': int_or_none(medium.get('height')),
+                'format_note': 'SMIL storyboards',
+            })
+
          return formats
  
      def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 1bc0ac76714a254d07923f6b2706ed8ac2e0b07f..7a77edf4c30c98f57b1c739efe39ef77a9e4b252 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -4546,20 +4546,24 @@ def mimetype2ext(mt):
      if mt is None:
          return None
  
-    ext = {
+    mt, _, params = mt.partition(';')
+    mt = mt.strip()
+
+    FULL_MAP = {
          'audio/mp4': 'm4a',
          # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
          # it's the most popular one
          'audio/mpeg': 'mp3',
          'audio/x-wav': 'wav',
-    }.get(mt)
+        'audio/wav': 'wav',
+        'audio/wave': 'wav',
+    }
+
+    ext = FULL_MAP.get(mt)
      if ext is not None:
          return ext
  
-    _, _, res = mt.rpartition('/')
-    res = res.split(';')[0].strip().lower()
-
-    return {
+    SUBTYPE_MAP = {
          '3gpp': '3gp',
          'smptett+xml': 'tt',
          'ttaf+xml': 'dfxp',
@@ -4578,7 +4582,28 @@ def mimetype2ext(mt):
          'quicktime': 'mov',
          'mp2t': 'ts',
          'x-wav': 'wav',
-    }.get(res, res)
+        'filmstrip+json': 'fs',
+        'svg+xml': 'svg',
+    }
+
+    _, _, subtype = mt.rpartition('/')
+    ext = SUBTYPE_MAP.get(subtype.lower())
+    if ext is not None:
+        return ext
+
+    SUFFIX_MAP = {
+        'json': 'json',
+        'xml': 'xml',
+        'zip': 'zip',
+        'gzip': 'gz',
+    }
+
+    _, _, suffix = subtype.partition('+')
+    ext = SUFFIX_MAP.get(suffix)
+    if ext is not None:
+        return ext
+
+    return subtype.replace('+', '.')
  
  
  def parse_codecs(codecs_str):
author	Felix S <redacted>
	Sat, 2 Oct 2021 18:43:42 +0000 (18:43 +0000)
committer	GitHub <redacted>
	Sat, 2 Oct 2021 18:43:42 +0000 (00:13 +0530)
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history