[extractor/generic] Add extractor-args `hls_key`, `variant_query` (#6567)

[yt-dlp.git] / yt_dlp / extractor / generic.py
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py

index 49aa5a1f5c96c44584c590f32afcd678858a7da3..075bb36ded8ef7f583898bc6d5348f6c78fcd0dd 100644 (file)
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -24,6 +24,7 @@
      mimetype2ext,
      orderedSet,
      parse_duration,
+    parse_qs,
      parse_resolution,
      smuggle_url,
      str_or_none,
@@ -32,6 +33,7 @@
      unescapeHTML,
      unified_timestamp,
      unsmuggle_url,
+    update_url_query,
      url_or_none,
      urljoin,
      variadic,
@@ -2184,12 +2186,21 @@ def report_detected(self, name, num=1, note=None):
  
          self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
  
-    def _fragment_query(self, url):
+    def _extra_manifest_info(self, info, manifest_url):
          if self._configuration_arg('fragment_query'):
-            query_string = urllib.parse.urlparse(url).query
+            query_string = urllib.parse.urlparse(manifest_url).query
              if query_string:
-                return {'extra_param_to_segment_url': query_string}
-        return {}
+                info['extra_param_to_segment_url'] = query_string
+
+        hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
+        info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key'), {
+            'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
+        }) or None
+
+        if self._configuration_arg('variant_query'):
+            query = parse_qs(manifest_url)
+            for fmt in self._downloader._get_formats(info):
+                fmt['url'] = update_url_query(fmt['url'], query)
  
      def _extract_rss(self, url, video_id, doc):
          NS_MAP = {
@@ -2397,10 +2408,8 @@ def _real_extract(self, url):
              subtitles = {}
              if format_id.endswith('mpegurl') or ext == 'm3u8':
                  formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
-                info_dict.update(self._fragment_query(url))
              elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
                  formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
-                info_dict.update(self._fragment_query(url))
              elif format_id == 'f4m' or ext == 'f4m':
                  formats = self._extract_f4m_formats(url, video_id, headers=headers)
              else:
@@ -2415,6 +2424,7 @@ def _real_extract(self, url):
                  'subtitles': subtitles,
                  'http_headers': headers or None,
              })
+            self._extra_manifest_info(info_dict, url)
              return info_dict
  
          if not self.get_param('test', False) and not is_intentional:
@@ -2427,7 +2437,7 @@ def _real_extract(self, url):
          if first_bytes.startswith(b'#EXTM3U'):
              self.report_detected('M3U playlist')
              info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
-            info_dict.update(self._fragment_query(url))
+            self._extra_manifest_info(info_dict, url)
              return info_dict
  
          # Maybe it's a direct link to a video?
@@ -2478,7 +2488,7 @@ def _real_extract(self, url):
                      doc,
                      mpd_base_url=full_response.geturl().rpartition('/')[0],
                      mpd_url=url)
-                info_dict.update(self._fragment_query(url))
+                self._extra_manifest_info(info_dict, url)
                  self.report_detected('DASH manifest')
                  return info_dict
              elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -2592,7 +2602,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                      formats.extend(fmts)
                      self._merge_subtitles(subs, target=subtitles)
                  for fmt in formats:
-                    fmt.update(self._fragment_query(src))
+                    self._extra_manifest_info(fmt, src)
  
                  if not formats:
                      formats.append({
@@ -2795,10 +2805,10 @@ def filter_video(urls):
                  return [self._extract_xspf_playlist(video_url, video_id)]
              elif ext == 'm3u8':
                  entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
-                entry_info_dict.update(self._fragment_query(video_url))
+                self._extra_manifest_info(entry_info_dict, video_url)
              elif ext == 'mpd':
                  entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
-                entry_info_dict.update(self._fragment_query(video_url))
+                self._extra_manifest_info(entry_info_dict, video_url)
              elif ext == 'f4m':
                  entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
              elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: