]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/iprima.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / iprima.py
index e58e9c2ee126eb280c66860d0cb049e569571a12..ab26dc5efeaad450cee2ef5f3a7b691c85377916 100644 (file)
@@ -3,12 +3,12 @@
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     determine_ext,
     js_to_json,
-    urlencode_postdata,
-    ExtractorError,
     parse_qs,
-    traverse_obj
+    traverse_obj,
+    urlencode_postdata,
 )
 
 
@@ -81,7 +81,7 @@ def _perform_login(self, username, password):
             note='Logging in')
 
         # a profile may need to be selected first, even when there is only a single one
-        if '/profile-select' in login_handle.geturl():
+        if '/profile-select' in login_handle.url:
             profile_id = self._search_regex(
                 r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
 
@@ -89,7 +89,7 @@ def _perform_login(self, username, password):
                 f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
                 query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
 
-        code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0))
+        code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
         if not code:
             raise ExtractorError('Login failed', expected=True)
 
@@ -134,10 +134,17 @@ def _real_extract(self, url):
         ), webpage, 'real id', group='id', default=None)
 
         if not video_id:
-            nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
+            nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
             video_id = traverse_obj(
                 nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
 
+        if not video_id:
+            nuxt_data = self._search_json(
+                r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
+                webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
+
+            video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
+
         if not video_id:
             self.raise_no_formats('Unable to extract video ID from webpage')
 
@@ -193,8 +200,8 @@ class IPrimaCNNIE(InfoExtractor):
             'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
         },
         'params': {
-            'skip_download': 'm3u8'
-        }
+            'skip_download': 'm3u8',
+        },
     }]
 
     def _real_extract(self, url):