]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/sina.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / sina.py
index 4083114184aa50bcd3fcb691bb7513a520450ac0..eeb9ebb44c9fa377b3691c920b89c862e0b0d3a7 100644 (file)
@@ -1,22 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     ExtractorError,
+    clean_html,
+    get_element_by_attribute,
     int_or_none,
-    update_url_query,
     qualities,
-    get_element_by_attribute,
-    clean_html,
+    update_url_query,
 )
 
 
 class SinaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
+    _VALID_URL = r'''(?x)https?://(?:[^/?#]+\.)?video\.sina\.com\.cn/
                         (?:
                             (?:view/|.*\#)(?P<id>\d+)|
                             .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
@@ -56,7 +51,7 @@ class SinaIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
 
         video_id = mobj.group('id')
         if not video_id:
@@ -65,7 +60,7 @@ def _real_extract(self, url):
                 self.to_screen('Getting video id')
                 request = HEADRequest(url)
                 _, urlh = self._download_webpage_handle(request, 'NA', False)
-                return self._real_extract(urlh.geturl())
+                return self._real_extract(urlh.url)
             else:
                 pseudo_id = mobj.group('pseudo_id')
                 webpage = self._download_webpage(url, pseudo_id)
@@ -102,7 +97,6 @@ def _real_extract(self, url):
                     'quality': preference(quality_id),
                     'ext': 'mp4',
                 })
-            self._sort_formats(formats)
 
             return {
                 'id': video_id,