]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/generic.py
[extractor/generic] Add `fragment_query` extractor arg for DASH and HLS (#5528)
[yt-dlp.git] / yt_dlp / extractor / generic.py
index 5da77273d804886f5ad3f73979112b895f6628ef..2fcbc6f43f830c68f51547b95d7de9952f89b43a 100644 (file)
@@ -2189,6 +2189,13 @@ def report_detected(self, name, num=1, note=None):
 
         self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
 
+    def _fragment_query(self, url):
+        if self._configuration_arg('fragment_query'):
+            query_string = urllib.parse.urlparse(url).query
+            if query_string:
+                return {'extra_param_to_segment_url': query_string}
+        return {}
+
     def _extract_rss(self, url, video_id, doc):
         NS_MAP = {
             'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
@@ -2351,8 +2358,10 @@ def _real_extract(self, url):
             subtitles = {}
             if format_id.endswith('mpegurl'):
                 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
+                info_dict.update(self._fragment_query(url))
             elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
                 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
+                info_dict.update(self._fragment_query(url))
             elif format_id == 'f4m':
                 formats = self._extract_f4m_formats(url, video_id, headers=headers)
             else:
@@ -2379,6 +2388,7 @@ def _real_extract(self, url):
         if first_bytes.startswith(b'#EXTM3U'):
             self.report_detected('M3U playlist')
             info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
+            info_dict.update(self._fragment_query(url))
             return info_dict
 
         # Maybe it's a direct link to a video?
@@ -2429,6 +2439,7 @@ def _real_extract(self, url):
                     doc,
                     mpd_base_url=full_response.geturl().rpartition('/')[0],
                     mpd_url=url)
+                info_dict.update(self._fragment_query(url))
                 self.report_detected('DASH manifest')
                 return info_dict
             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -2541,7 +2552,10 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                         m3u8_id='hls', fatal=False)
                     formats.extend(fmts)
                     self._merge_subtitles(subs, target=subtitles)
-                else:
+                for fmt in formats:
+                    fmt.update(self._fragment_query(src))
+
+                if not formats:
                     formats.append({
                         'url': src,
                         'ext': (mimetype2ext(src_type)
@@ -2776,8 +2790,10 @@ def filter_video(urls):
                 return [self._extract_xspf_playlist(video_url, video_id)]
             elif ext == 'm3u8':
                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
+                entry_info_dict.update(self._fragment_query(video_url))
             elif ext == 'mpd':
                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
+                entry_info_dict.update(self._fragment_query(video_url))
             elif ext == 'f4m':
                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: