]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/mediaset.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / mediaset.py
index 60c454ddadd7d66aca8c18b04b8618e6092cfaf3..61bdb2a3f1de9d8e66b094f86f6b3973197de2a3 100644 (file)
@@ -20,10 +20,10 @@ class MediasetIE(ThePlatformBaseIE):
                     (?:
                         mediaset:|
                         https?://
-                            (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+                            (?:\w+\.)+mediaset\.it/
                             (?:
                                 (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
-                                player/index\.html\?.*?\bprogramGuid=
+                                player/(?:v\d+/)?index\.html\?.*?\bprogramGuid=
                             )
                     )(?P<id>[0-9A-Z]{16,})
                     '''
@@ -141,6 +141,10 @@ class MediasetIE(ThePlatformBaseIE):
         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
         'only_matching': True,
+    }, {
+        # embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
+        'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
+        'only_matching': True,
     }, {
         'url': 'mediaset:FAFU000000665924',
         'only_matching': True,
@@ -159,36 +163,36 @@ class MediasetIE(ThePlatformBaseIE):
     }, {
         'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
         'only_matching': True,
+    }, {
+        'url': 'https://mediasetinfinity.mediaset.it/video/braveandbeautiful/episodio-113_F310948005000402',
+        'only_matching': True,
+    }, {
+        'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323',
+        'only_matching': True,
     }]
 
-    @staticmethod
-    def _extract_urls(ie, webpage):
-        def _qs(url):
-            return parse_qs(url)
-
+    def _extract_from_webpage(self, url, webpage):
         def _program_guid(qs):
             return qs.get('programGuid', [None])[0]
 
-        entries = []
         for mobj in re.finditer(
                 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
                 webpage):
             embed_url = mobj.group('url')
-            embed_qs = _qs(embed_url)
+            embed_qs = parse_qs(embed_url)
             program_guid = _program_guid(embed_qs)
             if program_guid:
-                entries.append(embed_url)
+                yield self.url_result(embed_url)
                 continue
+
             video_id = embed_qs.get('id', [None])[0]
             if not video_id:
                 continue
-            urlh = ie._request_webpage(
-                embed_url, video_id, note='Following embed URL redirect')
+            urlh = self._request_webpage(embed_url, video_id, note='Following embed URL redirect')
             embed_url = urlh.geturl()
-            program_guid = _program_guid(_qs(embed_url))
+            program_guid = _program_guid(parse_qs(embed_url))
             if program_guid:
-                entries.append(embed_url)
-        return entries
+                yield self.url_result(embed_url)
 
     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         for video in smil.findall(self._xpath_ns('.//video', namespace)):
@@ -243,8 +247,6 @@ def _real_extract(self, url):
         if (first_e or geo_e) and not formats:
             raise geo_e or first_e
 
-        self._sort_formats(formats)
-
         feed_data = self._download_json(
             'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid,
             guid, fatal=False)
@@ -282,11 +284,11 @@ def _real_extract(self, url):
         return info
 
 
-class MediasetShowIE(MediasetIE):
+class MediasetShowIE(MediasetIE):  # XXX: Do not subclass from concrete IE
     _VALID_URL = r'''(?x)
                     (?:
                         https?://
-                            (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+                            (\w+\.)+mediaset\.it/
                             (?:
                                 (?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
                                     (?:[a-z-]+)_SE(?P<id>\d{12})