[ie/matchtv] Fix extractor (#10190)

[yt-dlp.git] / yt_dlp / extractor / sina.py
diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py

index d30d57d858355fdbbe060d05627bdb3b8acad28f..974af1bc53f1b5d90a7a759a37a88ec1eb85ccbe 100644 (file)
--- a/yt_dlp/extractor/sina.py
+++ b/yt_dlp/extractor/sina.py
@@ -1,17 +1,17 @@
  from .common import InfoExtractor
+from ..networking import HEADRequest
  from ..utils import (
-    HEADRequest,
      ExtractorError,
+    clean_html,
+    get_element_by_attribute,
      int_or_none,
-    update_url_query,
      qualities,
-    get_element_by_attribute,
-    clean_html,
+    update_url_query,
  )
  
  
  class SinaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
+    _VALID_URL = r'''(?x)https?://(?:[^/?#]+\.)?video\.sina\.com\.cn/
                          (?:
                              (?:view/|.*\#)(?P<id>\d+)|
                              .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
@@ -28,7 +28,7 @@ class SinaIE(InfoExtractor):
                  'id': '250576622',
                  'ext': 'mp4',
                  'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
-            }
+            },
          },
          {
              'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html',
@@ -60,14 +60,13 @@ def _real_extract(self, url):
                  self.to_screen('Getting video id')
                  request = HEADRequest(url)
                  _, urlh = self._download_webpage_handle(request, 'NA', False)
-                return self._real_extract(urlh.geturl())
+                return self._real_extract(urlh.url)
              else:
                  pseudo_id = mobj.group('pseudo_id')
                  webpage = self._download_webpage(url, pseudo_id)
                  error = get_element_by_attribute('class', 'errtitle', webpage)
                  if error:
-                    raise ExtractorError('%s said: %s' % (
-                        self.IE_NAME, clean_html(error)), expected=True)
+                    raise ExtractorError(f'{self.IE_NAME} said: {clean_html(error)}', expected=True)
                  video_id = self._search_regex(
                      r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
  
@@ -75,7 +74,7 @@ def _real_extract(self, url):
              'http://s.video.sina.com.cn/video/h5play',
              video_id, query={'video_id': video_id})
          if video_data['code'] != 1:
-            raise ExtractorError('%s said: %s' % (
+            raise ExtractorError('{} said: {}'.format(
                  self.IE_NAME, video_data['message']), expected=True)
          else:
              video_data = video_data['data']
@@ -97,7 +96,6 @@ def _real_extract(self, url):
                      'quality': preference(quality_id),
                      'ext': 'mp4',
                  })
-            self._sort_formats(formats)
  
              return {
                  'id': video_id,