]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/nova.py
[npr] Make SMIL extraction non-fatal (#2099)
[yt-dlp.git] / yt_dlp / extractor / nova.py
index 3acb8812176dcfa4a262aea967753953040f724f..00a64f88d1e81c8b9e5038a922d54c42f0927f2b 100644 (file)
@@ -10,6 +10,7 @@
     int_or_none,
     js_to_json,
     qualities,
+    traverse_obj,
     unified_strdate,
     url_or_none,
 )
 
 class NovaEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
-        'md5': 'ee009bafcc794541570edd44b71cbea3',
         'info_dict': {
             'id': '8o0n0r',
-            'ext': 'mp4',
             'title': '2180. díl',
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 2578,
         },
-    }
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': ['DRM protected', 'Requested format is not available'],
+    }, {
+        'url': 'https://media.cms.nova.cz/embed/KybpWYvcgOa',
+        'info_dict': {
+            'id': 'KybpWYvcgOa',
+            'ext': 'mp4',
+            'title': 'Borhyová oslavila 60? Soutěžící z pořadu odboural moderátora Ondřeje Sokola',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 114,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
+        has_drm = False
         duration = None
         formats = []
 
         player = self._parse_json(
             self._search_regex(
-                r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;',
-                webpage, 'player', default='{}'), video_id, fatal=False)
+                (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
+                    r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
+                webpage, 'player', default='{}', group='json'), video_id, fatal=False)
         if player:
             for format_id, format_list in player['tracks'].items():
                 if not isinstance(format_list, list):
@@ -48,6 +64,10 @@ def _real_extract(self, url):
                 for format_dict in format_list:
                     if not isinstance(format_dict, dict):
                         continue
+                    if (not self.get_param('allow_unplayable_formats')
+                            and traverse_obj(format_dict, ('drm', 'keySystem'))):
+                        has_drm = True
+                        continue
                     format_url = url_or_none(format_dict.get('src'))
                     format_type = format_dict.get('type')
                     ext = determine_ext(format_url)
@@ -104,6 +124,8 @@ def _real_extract(self, url):
                     f['format_id'] = f_id
                     formats.append(f)
 
+        if not formats and has_drm:
+            self.report_drm(video_id)
         self._sort_formats(formats)
 
         title = self._og_search_title(