]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/sina.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / sina.py
index d30d57d858355fdbbe060d05627bdb3b8acad28f..974af1bc53f1b5d90a7a759a37a88ec1eb85ccbe 100644 (file)
@@ -1,17 +1,17 @@
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     ExtractorError,
+    clean_html,
+    get_element_by_attribute,
     int_or_none,
-    update_url_query,
     qualities,
-    get_element_by_attribute,
-    clean_html,
+    update_url_query,
 )
 
 
 class SinaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
+    _VALID_URL = r'''(?x)https?://(?:[^/?#]+\.)?video\.sina\.com\.cn/
                         (?:
                             (?:view/|.*\#)(?P<id>\d+)|
                             .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
@@ -28,7 +28,7 @@ class SinaIE(InfoExtractor):
                 'id': '250576622',
                 'ext': 'mp4',
                 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
-            }
+            },
         },
         {
             'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html',
@@ -60,14 +60,13 @@ def _real_extract(self, url):
                 self.to_screen('Getting video id')
                 request = HEADRequest(url)
                 _, urlh = self._download_webpage_handle(request, 'NA', False)
-                return self._real_extract(urlh.geturl())
+                return self._real_extract(urlh.url)
             else:
                 pseudo_id = mobj.group('pseudo_id')
                 webpage = self._download_webpage(url, pseudo_id)
                 error = get_element_by_attribute('class', 'errtitle', webpage)
                 if error:
-                    raise ExtractorError('%s said: %s' % (
-                        self.IE_NAME, clean_html(error)), expected=True)
+                    raise ExtractorError(f'{self.IE_NAME} said: {clean_html(error)}', expected=True)
                 video_id = self._search_regex(
                     r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
 
@@ -75,7 +74,7 @@ def _real_extract(self, url):
             'http://s.video.sina.com.cn/video/h5play',
             video_id, query={'video_id': video_id})
         if video_data['code'] != 1:
-            raise ExtractorError('%s said: %s' % (
+            raise ExtractorError('{} said: {}'.format(
                 self.IE_NAME, video_data['message']), expected=True)
         else:
             video_data = video_data['data']
@@ -97,7 +96,6 @@ def _real_extract(self, url):
                     'quality': preference(quality_id),
                     'ext': 'mp4',
                 })
-            self._sort_formats(formats)
 
             return {
                 'id': video_id,