]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/cctv.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / cctv.py
index 466bdfb7cc635e0b0e08beecd598a2318d80c12e..18c080df1bc0efc486c76ff17db71afdcfc7bf30 100644 (file)
@@ -1,7 +1,6 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     float_or_none,
     try_get,
@@ -88,6 +87,20 @@ class CCTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # videoCenterId: "id"
+        'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
+        'info_dict': {
+            'id': '5c846c0518444308ba32c4159df3b3e0',
+            'ext': 'mp4',
+            'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
+            'uploader': 'yangjuan',
+            'timestamp': 1708554940,
+            'upload_date': '20240221',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # var ids = ["id"]
         'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@@ -128,7 +141,7 @@ def _real_extract(self, url):
 
         video_id = self._search_regex(
             [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
-             r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+             r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
              r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
              r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
              r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
@@ -153,17 +166,17 @@ def _real_extract(self, url):
         if isinstance(video, dict):
             for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
                 video_url = try_get(
-                    video, lambda x: x[chapters_key][0]['url'], compat_str)
+                    video, lambda x: x[chapters_key][0]['url'], str)
                 if video_url:
                     formats.append({
                         'url': video_url,
                         'format_id': 'http',
                         'quality': quality,
                         # Sample clip
-                        'preference': -10
+                        'preference': -10,
                     })
 
-        hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
+        hls_url = try_get(data, lambda x: x['hls_url'], str)
         if hls_url:
             hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
             formats.extend(self._extract_m3u8_formats(