]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/glomex.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / glomex.py
index 85ffa4c0524f7237903da1182ab5ee0d88bc4125..86fe1b0243e012b78eef9396714f5d4679dbcda9 100644 (file)
@@ -174,7 +174,7 @@ def build_player_url(cls, video_id, integration, origin_url=None):
         return cls._smuggle_origin_url(f'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url)
 
     @classmethod
-    def _extract_urls(cls, webpage, origin_url):
+    def _extract_embed_urls(cls, url, webpage):
         # https://docs.glomex.com/publisher/video-player-integration/javascript-api/
         quot_re = r'["\']'
 
@@ -183,9 +183,9 @@ def _extract_urls(cls, webpage, origin_url):
                 (?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
             )(?P=q)'''
         for mobj in re.finditer(regex, webpage):
-            url = unescapeHTML(mobj.group('url'))
-            if cls.suitable(url):
-                yield cls._smuggle_origin_url(url, origin_url)
+            embed_url = unescapeHTML(mobj.group('url'))
+            if cls.suitable(embed_url):
+                yield cls._smuggle_origin_url(embed_url, url)
 
         regex = fr'''(?x)
             <glomex-player [^>]+?>|
@@ -193,7 +193,7 @@ def _extract_urls(cls, webpage, origin_url):
         for mobj in re.finditer(regex, webpage):
             attrs = extract_attributes(mobj.group(0))
             if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
-                yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
+                yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], url)
 
         # naive parsing of inline scripts for hard-coded integration parameters
         regex = fr'''(?x)
@@ -206,7 +206,7 @@ def _extract_urls(cls, webpage, origin_url):
                 continue
             playlist_id = re.search(regex % 'playlistId', script)
             if playlist_id:
-                yield cls.build_player_url(playlist_id, integration_id, origin_url)
+                yield cls.build_player_url(playlist_id, integration_id, url)
 
     def _real_extract(self, url):
         url, origin_url = self._unsmuggle_origin_url(url)