]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/rai.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / rai.py
index 2ce1b1a5c46c0cb57a35e2be2c69dd6e9e7e24ea..cab12cc214a0b64788cb4aeb9fc7b3e4d6056292 100644 (file)
@@ -51,6 +51,9 @@ def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
                 query={'output': 45, 'pl': platform},
                 headers=self.geo_verification_headers())
 
+            if xpath_text(relinker, './license_url', default='{}') != '{}':
+                self.report_drm(video_id)
+
             if not geoprotection:
                 geoprotection = xpath_text(
                     relinker, './geoprotection', default=None) == 'Y'
@@ -153,7 +156,7 @@ def get_format_info(tbr):
             br = int_or_none(tbr)
             if len(fmts) == 1 and not br:
                 br = fmts[0].get('tbr')
-            if br or 0 > 300:
+            if br and br > 300:
                 tbr = compat_str(math.floor(br / 100) * 100)
             else:
                 tbr = '250'
@@ -251,6 +254,8 @@ class RaiPlayIE(RaiBaseIE):
             },
             'release_year': 2022,
             'episode': 'Espresso nel caffè - 07/04/2014',
+            'timestamp': 1396919880,
+            'upload_date': '20140408',
         },
         'params': {
             'skip_download': True,
@@ -274,6 +279,8 @@ class RaiPlayIE(RaiBaseIE):
             'release_year': 2021,
             'season_number': 1,
             'episode': 'Senza occhi',
+            'timestamp': 1637318940,
+            'upload_date': '20211119',
         },
     }, {
         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
@@ -284,7 +291,7 @@ class RaiPlayIE(RaiBaseIE):
         'only_matching': True,
     }, {
         # DRM protected
-        'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
+        'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html',
         'only_matching': True,
     }]
 
@@ -306,7 +313,6 @@ def _real_extract(self, url):
         video = media['video']
 
         relinker_info = self._extract_relinker_info(video['content_url'], video_id)
-        self._sort_formats(relinker_info['formats'])
 
         thumbnails = []
         for _, value in media.get('images', {}).items():
@@ -349,7 +355,7 @@ def _real_extract(self, url):
         }
 
 
-class RaiPlayLiveIE(RaiPlayIE):
+class RaiPlayLiveIE(RaiPlayIE):  # XXX: Do not subclass from concrete IE
     _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'http://www.raiplay.it/dirette/rainews24',
@@ -363,6 +369,8 @@ class RaiPlayLiveIE(RaiPlayIE):
             'creator': 'Rai News 24',
             'is_live': True,
             'live_status': 'is_live',
+            'upload_date': '20090502',
+            'timestamp': 1241276220,
         },
         'params': {
             'skip_download': True,
@@ -448,6 +456,8 @@ class RaiPlaySoundIE(RaiBaseIE):
             'series': 'Il Ruggito del Coniglio',
             'episode': 'Il Ruggito del Coniglio del 10/12/2021',
             'creator': 'rai radio 2',
+            'timestamp': 1638346620,
+            'upload_date': '20211201',
         },
         'params': {
             'skip_download': True,
@@ -493,7 +503,7 @@ def _real_extract(self, url):
         }
 
 
-class RaiPlaySoundLiveIE(RaiPlaySoundIE):
+class RaiPlaySoundLiveIE(RaiPlaySoundIE):  # XXX: Do not subclass from concrete IE
     _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)'
     _TESTS = [{
         'url': 'https://www.raiplaysound.it/radio2',
@@ -610,8 +620,6 @@ def _extract_from_content_id(self, content_id, url):
         else:
             raise ExtractorError('not a media file')
 
-        self._sort_formats(relinker_info['formats'])
-
         thumbnails = []
         for image_type in ('image', 'image_medium', 'image_300'):
             thumbnail_url = media.get(image_type)
@@ -692,7 +700,6 @@ def _real_extract(self, url):
 
         relinker_info = self._extract_relinker_info(
             urljoin(url, relinker_url), video_id)
-        self._sort_formats(relinker_info['formats'])
 
         title = self._search_regex(
             r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
@@ -706,8 +713,9 @@ def _real_extract(self, url):
         }
 
 
-class RaiNewsIE(RaiIE):
-    _VALID_URL = rf'https?://(www\.)?rainews\.it/[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
+class RaiNewsIE(RaiIE):  # XXX: Do not subclass from concrete IE
+    _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
+    _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
     _TESTS = [{
         # new rainews player (#3911)
         'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html',
@@ -732,6 +740,10 @@ class RaiNewsIE(RaiIE):
             'upload_date': '20161103'
         },
         'expected_warnings': ['unable to extract player_data'],
+    }, {
+        # iframe + drm
+        'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -755,7 +767,6 @@ def _real_extract(self, url):
                 raise ExtractorError('Relinker URL not found', cause=e)
 
         relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id)
-        self._sort_formats(relinker_info['formats'])
 
         return {
             'id': video_id,
@@ -764,3 +775,46 @@ def _real_extract(self, url):
             'uploader': strip_or_none(track_info.get('editor') or None),
             **relinker_info
         }
+
+
+class RaiSudtirolIE(RaiBaseIE):
+    _VALID_URL = r'https?://raisudtirol\.rai\.it/.+?media=(?P<id>[TP]tv\d+)'
+    _TESTS = [{
+        'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
+        'info_dict': {
+            'id': 'Ptv1619729460',
+            'ext': 'mp4',
+            'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
+            'series': 'Euro: trasmisciun d\'economia',
+            'upload_date': '20210429',
+            'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+?\.jpg',
+            'uploader': 'raisudtirol',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        video_date = self._html_search_regex(r'<span class="med_data">(.+?)</span>', webpage, 'video_date', fatal=False)
+        video_title = self._html_search_regex(r'<span class="med_title">(.+?)</span>', webpage, 'video_title', fatal=False)
+        video_url = self._html_search_regex(r'sources:\s*\[\{file:\s*"(.+?)"\}\]', webpage, 'video_url')
+        video_thumb = self._html_search_regex(r'image: \'(.+?)\'', webpage, 'video_thumb', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': join_nonempty(video_title, video_date, delim=' - '),
+            'series': video_title,
+            'upload_date': unified_strdate(video_date),
+            'thumbnail': urljoin('https://raisudtirol.rai.it/', video_thumb),
+            'uploader': 'raisudtirol',
+            'formats': [{
+                'format_id': 'https-mp4',
+                'url': self._proto_relative_url(video_url),
+                'width': 1024,
+                'height': 576,
+                'fps': 25,
+                'vcodec': 'h264',
+                'acodec': 'aac',
+            }],
+        }