[extractor/hotstar] Improve format metadata

author pukkandan <redacted>

Fri, 9 Dec 2022 09:47:51 +0000 (15:17 +0530)

committer pukkandan <redacted>

Fri, 9 Dec 2022 09:53:59 +0000 (15:23 +0530)
author pukkandan <redacted>
Fri, 9 Dec 2022 09:47:51 +0000 (15:17 +0530)
committer pukkandan <redacted>
Fri, 9 Dec 2022 09:53:59 +0000 (15:23 +0530)
diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py

index 8725c9436f088ad1fe13db868426e2b7e64d7a89..cea1812f15cd18bad4a00005df081fc99f6c079b 100644 (file)
--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@@ -148,6 +148,12 @@ class HotStarIE(HotStarBaseIE):
          'dr': 'dynamic_range',
      }
  
+    _TAG_FIELDS = {
+        'language': 'language',
+        'acodec': 'audio_codec',
+        'vcodec': 'video_codec',
+    }
+
      @classmethod
      def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
          assert None in (video_type, root)
@@ -182,24 +188,22 @@ def _real_extract(self, url):
                     for key, prefix in self._IGNORE_MAP.items()
                     for ignore in self._configuration_arg(key)):
                  continue
+            tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';'))
  
              format_url = url_or_none(playback_set.get('playbackUrl'))
              if not format_url:
                  continue
              format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
-            dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr')
              ext = determine_ext(format_url)
  
              current_formats, current_subs = [], {}
              try:
                  if 'package:hls' in tags or ext == 'm3u8':
                      current_formats, current_subs = self._extract_m3u8_formats_and_subtitles(
-                        format_url, video_id, 'mp4',
-                        entry_protocol='m3u8_native',
-                        m3u8_id=f'{dr}-hls', headers=headers)
+                        format_url, video_id, ext='mp4', headers=headers)
                  elif 'package:dash' in tags or ext == 'mpd':
                      current_formats, current_subs = self._extract_mpd_formats_and_subtitles(
-                        format_url, video_id, mpd_id=f'{dr}-dash', headers=headers)
+                        format_url, video_id, headers=headers)
                  elif ext == 'f4m':
                      pass  # XXX: produce broken files
                  else:
@@ -213,20 +217,32 @@ def _real_extract(self, url):
                      geo_restricted = True
                  continue
  
-            if tags and 'encryption:plain' not in tags:
+            if tag_dict.get('encryption') not in ('plain', None):
                  for f in current_formats:
                      f['has_drm'] = True
-            if tags and 'language' in tags:
-                lang = re.search(r'language:(?P<lang>[a-z]+)', tags).group('lang')
-                for f in current_formats:
-                    if not f.get('langauge'):
-                        f['language'] = lang
+            for f in current_formats:
+                for k, v in self._TAG_FIELDS.items():
+                    if not f.get(k):
+                        f[k] = tag_dict.get(v)
+                if f.get('vcodec') != 'none' and not f.get('dynamic_range'):
+                    f['dynamic_range'] = tag_dict.get('dynamic_range')
+                if f.get('acodec') != 'none' and not f.get('audio_channels'):
+                    f['audio_channels'] = {
+                        'stereo': 2,
+                        'dolby51': 6,
+                    }.get(tag_dict.get('audio_channel'))
+                f['format_note'] = join_nonempty(
+                    tag_dict.get('ladder'),
+                    tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None,
+                    f.get('format_note'),
+                    delim=', ')
  
              formats.extend(current_formats)
              subs = self._merge_subtitles(subs, current_subs)
  
          if not formats and geo_restricted:
              self.raise_geo_restricted(countries=['IN'], metadata_available=True)
+        self._remove_duplicate_formats(formats)
          for f in formats:
              f.setdefault('http_headers', {}).update(headers)
  
@@ -235,7 +251,7 @@ def _real_extract(self, url):
              'title': video_data.get('title'),
              'description': video_data.get('description'),
              'duration': int_or_none(video_data.get('duration')),
-            'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
+            'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
              'formats': formats,
              'subtitles': subs,
              'channel': video_data.get('channelName'),
author	pukkandan <redacted>
	Fri, 9 Dec 2022 09:47:51 +0000 (15:17 +0530)
committer	pukkandan <redacted>
	Fri, 9 Dec 2022 09:53:59 +0000 (15:23 +0530)