]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/mediaset.py
[ie/mlbtv] Fix extraction (#10296)
[yt-dlp.git] / yt_dlp / extractor / mediaset.py
index e3b728dcae20e827e8a3c07710eb5b50a9882099..8cb18e6096f3af2c95e0d6a16c6fc35c82fa694d 100644 (file)
@@ -5,11 +5,11 @@
 from ..utils import (
     ExtractorError,
     GeoRestrictedError,
-    int_or_none,
     OnDemandPagedList,
+    int_or_none,
     try_get,
-    urljoin,
     update_url_query,
+    urljoin,
 )
 
 
@@ -29,7 +29,7 @@ class MediasetIE(ThePlatformBaseIE):
                     '''
 
     _EMBED_REGEX = [
-        rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]'
+        rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]',
     ]
     _TESTS = [{
         # full episode
@@ -73,6 +73,7 @@ class MediasetIE(ThePlatformBaseIE):
             'season_number': 5,
             'episode_number': 5,
             'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
+            'categories': ['Informazione'],
         },
     }, {
         # DRM
@@ -127,7 +128,8 @@ class MediasetIE(ThePlatformBaseIE):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Dead link',
     }, {
         # WittyTV embed
         'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
@@ -148,17 +150,18 @@ class MediasetIE(ThePlatformBaseIE):
             'season_number': 12,
             'episode': 'Episode 8',
             'episode_number': 8,
+            'categories': ['Intrattenimento'],
         },
         'params': {
             'skip_download': True,
-        }
+        },
     }]
 
     def _parse_smil_formats_and_subtitles(
             self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         for video in smil.findall(self._xpath_ns('.//video', namespace)):
             video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
-        return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
+        return super()._parse_smil_formats_and_subtitles(
             smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
 
     def _check_drm_formats(self, tp_formats, video_id):