[ie] Add `media_type` field

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index b3a45b3fb578b85d30fac977ed2b8ec82088d059..af534775f0f541fa745f8e7995bf07df00b8f666 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -286,6 +286,9 @@ class InfoExtractor:
                      If it is not clear whether to use timestamp or this, use the former
      release_date:   The date (YYYYMMDD) when the video was released in UTC.
                      If not explicitly set, calculated from release_timestamp
+    release_year:   Year (YYYY) as integer when the video or album was released.
+                    To be used if no exact release date is known.
+                    If not explicitly set, calculated from release_date.
      modified_timestamp: UNIX timestamp of the moment the video was last modified.
      modified_date:   The date (YYYYMMDD) when the video was last modified in UTC.
                      If not explicitly set, calculated from modified_timestamp
@@ -379,6 +382,7 @@ class InfoExtractor:
                      'private', 'premium_only', 'subscriber_only', 'needs_auth',
                      'unlisted' or 'public'. Use 'InfoExtractor._availability'
                      to set it
+    media_type:     The type of media as classified by the site, e.g. "episode", "clip", "trailer"
      _old_archive_ids: A list of old archive ids needed for backward compatibility
      _format_sort_fields: A list of fields to use for sorting formats
      __post_extractor: A function to be called just before the metadata is
@@ -427,7 +431,6 @@ class InfoExtractor:
                      and compilations).
      disc_number:    Number of the disc or other physical medium the track belongs to,
                      as an integer.
-    release_year:   Year (YYYY) when the album was released.
      composer:       Composer of the piece
  
      The following fields should only be set for clips that should be cut from the original video:
@@ -2341,7 +2344,9 @@ def _parse_smil_formats_and_subtitles(
          imgs_count = 0
  
          srcs = set()
-        media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
+        media = itertools.chain.from_iterable(
+            smil.findall(self._xpath_ns(arg, namespace))
+            for arg in ['.//video', './/audio', './/media'])
          for medium in media:
              src = medium.get('src')
              if not src or src in srcs: