[ie/matchtv] Fix extractor (#10190)

[yt-dlp.git] / yt_dlp / extractor / glomex.py
diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py

index d9ef4338fc1a2eedcab8e4d7bd84f5d988dd3755..35ffad56c2c56ef18f8d295a697e7c9b63ea345f 100644 (file)
--- a/yt_dlp/extractor/glomex.py
+++ b/yt_dlp/extractor/glomex.py
@@ -1,14 +1,11 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import re
  import urllib.parse
  
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
      determine_ext,
      extract_attributes,
-    ExtractorError,
      int_or_none,
      parse_qs,
      smuggle_url,
@@ -52,15 +49,15 @@ def _download_api_data(self, video_id, integration, current_url=None):
          video_id_type = self._get_videoid_type(video_id)
          return self._download_json(
              self._API_URL,
-            video_id, 'Downloading %s JSON' % video_id_type,
-            'Unable to download %s JSON' % video_id_type,
+            video_id, f'Downloading {video_id_type} JSON',
+            f'Unable to download {video_id_type} JSON',
              query=query)
  
      def _download_and_extract_api_data(self, video_id, integration, current_url):
          api_data = self._download_api_data(video_id, integration, current_url)
          videos = api_data['videos']
          if not videos:
-            raise ExtractorError('no videos found for %s' % video_id)
+            raise ExtractorError(f'no videos found for {video_id}')
          videos = [self._extract_api_data(video, video_id) for video in videos]
          return videos[0] if len(videos) == 1 else self.playlist_result(videos, video_id)
  
@@ -85,7 +82,6 @@ def _extract_api_data(self, video, video_id):
          if video.get('language'):
              for fmt in formats:
                  fmt['language'] = video['language']
-        self._sort_formats(formats)
  
          images = (video.get('images') or []) + [video.get('image') or {}]
          thumbnails = [{
@@ -177,7 +173,7 @@ def build_player_url(cls, video_id, integration, origin_url=None):
          return cls._smuggle_origin_url(f'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url)
  
      @classmethod
-    def _extract_urls(cls, webpage, origin_url):
+    def _extract_embed_urls(cls, url, webpage):
          # https://docs.glomex.com/publisher/video-player-integration/javascript-api/
          quot_re = r'["\']'
  
@@ -186,9 +182,9 @@ def _extract_urls(cls, webpage, origin_url):
                  (?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
              )(?P=q)'''
          for mobj in re.finditer(regex, webpage):
-            url = unescapeHTML(mobj.group('url'))
-            if cls.suitable(url):
-                yield cls._smuggle_origin_url(url, origin_url)
+            embed_url = unescapeHTML(mobj.group('url'))
+            if cls.suitable(embed_url):
+                yield cls._smuggle_origin_url(embed_url, url)
  
          regex = fr'''(?x)
              <glomex-player [^>]+?>|
@@ -196,7 +192,7 @@ def _extract_urls(cls, webpage, origin_url):
          for mobj in re.finditer(regex, webpage):
              attrs = extract_attributes(mobj.group(0))
              if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
-                yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
+                yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], url)
  
          # naive parsing of inline scripts for hard-coded integration parameters
          regex = fr'''(?x)
@@ -209,7 +205,7 @@ def _extract_urls(cls, webpage, origin_url):
                  continue
              playlist_id = re.search(regex % 'playlistId', script)
              if playlist_id:
-                yield cls.build_player_url(playlist_id, integration_id, origin_url)
+                yield cls.build_player_url(playlist_id, integration_id, url)
  
      def _real_extract(self, url):
          url, origin_url = self._unsmuggle_origin_url(url)