]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/vrt.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / vrt.py
index 00583571221c556c8da30e3b16b6ed5d22574eea..33ff574750056cfe1aa12407ee4e852ad1c7fcf9 100644 (file)
@@ -1,10 +1,10 @@
 import functools
 import json
 import time
-import urllib.error
 import urllib.parse
 
 from .gigya import GigyaBaseIE
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     clean_html,
@@ -16,6 +16,7 @@
     join_nonempty,
     jwt_encode_hs256,
     make_archive_id,
+    merge_dicts,
     parse_age_limit,
     parse_iso8601,
     str_or_none,
@@ -37,12 +38,12 @@ class VRTBaseIE(GigyaBaseIE):
         'device': 'undefined (undefined)',
         'os': {
             'name': 'Windows',
-            'version': 'x86_64'
+            'version': 'x86_64',
         },
         'player': {
             'name': 'VRT web player',
-            'version': '2.7.4-prod-2023-04-19T06:05:45'
-        }
+            'version': '2.7.4-prod-2023-04-19T06:05:45',
+        },
     }
     # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
     _JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
@@ -97,8 +98,8 @@ def _call_api(self, video_id, client='null', id_token=None, version='v2'):
             }, data=json.dumps({
                 'identityToken': id_token or {},
                 'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
-                    'kid': self._JWT_KEY_ID
-                }).decode()
+                    'kid': self._JWT_KEY_ID,
+                }).decode(),
             }, separators=(',', ':')).encode())['vrtPlayerToken']
 
         return self._download_json(
@@ -263,7 +264,7 @@ def _perform_login(self, username, password):
                         '_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
                     }))
             except ExtractorError as e:
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                     retry.error = e
                     continue
                 raise
@@ -364,7 +365,7 @@ def _real_extract(self, url):
     subtitleVideodetail
     titleVideodetail
   }
-}''' % display_id,
+}''' % display_id,  # noqa: UP031
             })['data']['video']
 
         video_id = urllib.parse.unquote(video['mediaReference'])
@@ -425,3 +426,64 @@ def _real_extract(self, url):
                 ['description', 'twitter:description', 'og:description'], webpage),
             '_old_archive_ids': [make_archive_id('Canvas', video_id)],
         }
+
+
+class Radio1BeIE(VRTBaseIE):
+    _VALID_URL = r'https?://radio1\.be/(?:lees|luister/select)/(?P<id>[\w/-]+)'
+    _TESTS = [{
+        'url': 'https://radio1.be/luister/select/de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
+        'info_dict': {
+            'id': 'eb6c22e9-544f-44f4-af39-cf8cccd29e22',
+            'title': 'Komt N-VA volgend jaar op in WalloniĆ«?',
+            'display_id': 'de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
+            'description': 'md5:b374ea1c9302f38362df9dea1931468e',
+            'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+',
+        },
+        'playlist_mincount': 1,
+    }, {
+        'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web',
+        'info_dict': {
+            'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2',
+            'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza',
+            'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64',
+            'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+',
+            'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza',
+        },
+        'playlist_mincount': 1,
+    }]
+
+    def _extract_video_entries(self, next_js_data, display_id):
+        video_data = traverse_obj(
+            next_js_data, ((None, ('paragraphs', ...)), {lambda x: x if x['mediaReference'] else None}))
+        for data in video_data:
+            media_reference = data['mediaReference']
+            formats, subtitles = self._extract_formats_and_subtitles(
+                self._call_api(media_reference), display_id)
+
+            yield {
+                'id': media_reference,
+                'formats': formats,
+                'subtitles': subtitles,
+                **traverse_obj(data, {
+                    'title': ('title', {str}),
+                    'description': ('body', {clean_html}),
+                }),
+            }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['item']
+
+        return self.playlist_result(
+            self._extract_video_entries(next_js_data, display_id), **merge_dicts(traverse_obj(
+                next_js_data, ({
+                    'id': ('id', {str}),
+                    'title': ('title', {str}),
+                    'description': (('description', 'content'), {clean_html}),
+                }), get_all=False), {
+                    'display_id': display_id,
+                    'title': self._html_search_meta(['name', 'og:title', 'twitter:title'], webpage),
+                    'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+                    'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
+            }))