]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/seznamzpravy.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / seznamzpravy.py
index 7a1c7e38bec915a31713a98a2cbee234faf0edb1..79e88858356f470e48095b88e1ca7f1159cbcc05 100644 (file)
@@ -1,11 +1,5 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
 from .common import InfoExtractor
 from ..compat import (
-    compat_parse_qs,
     compat_str,
     compat_urllib_parse_urlparse,
 )
@@ -13,6 +7,7 @@
     urljoin,
     int_or_none,
     parse_codecs,
+    parse_qs,
     try_get,
 )
 
@@ -23,6 +18,7 @@ def _raw_id(src_url):
 
 class SeznamZpravyIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
+    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1']
     _TESTS = [{
         'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy',
         'info_dict': {
@@ -51,13 +47,6 @@ class SeznamZpravyIE(InfoExtractor):
         },
     }]
 
-    @staticmethod
-    def _extract_urls(webpage):
-        return [
-            mobj.group('url') for mobj in re.finditer(
-                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1',
-                webpage)]
-
     def _extract_sdn_formats(self, sdn_url, video_id):
         sdn_data = self._download_json(sdn_url, video_id)
 
@@ -104,11 +93,10 @@ def get_url(format_id):
                 urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
                 m3u8_id='hls', fatal=False))
 
-        self._sort_formats(formats)
         return formats
 
     def _real_extract(self, url):
-        params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        params = parse_qs(url)
 
         src = params['src'][0]
         title = params['title'][0]
@@ -165,5 +153,5 @@ def _real_extract(self, url):
 
         return self.playlist_result([
             self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
-            for entry_url in SeznamZpravyIE._extract_urls(webpage)],
+            for entry_url in SeznamZpravyIE._extract_embed_urls(url, webpage)],
             article_id, title, description)