]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/rtlnl.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / rtlnl.py
CommitLineData
6493f5d7 1from .common import InfoExtractor
59b8ab58
PH
2from ..utils import (
3 int_or_none,
4 parse_duration,
5)
6493f5d7
JMF
6
7
59b8ab58
PH
8class RtlNlIE(InfoExtractor):
9 IE_NAME = 'rtl.nl'
10 IE_DESC = 'rtl.nl and rtlxl.nl'
bfd973ec 11 _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)']
59b8ab58 12 _VALID_URL = r'''(?x)
7a64c33a 13 https?://(?:(?:www|static)\.)?
59b8ab58 14 (?:
97f34a48 15 rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
45f63624
S
16 rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
17 embed\.rtl\.nl/\#uuid=
59b8ab58
PH
18 )
19 (?P<id>[0-9a-f-]+)'''
6493f5d7 20
59b8ab58 21 _TESTS = [{
97f34a48
DL
22 # new URL schema
23 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
24 'md5': '490428f1187b60d714f34e1f2e3af0b6',
25 'info_dict': {
26 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
27 'ext': 'mp4',
28 'title': 'RTL Nieuws',
29 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
30 'timestamp': 1593293400,
31 'upload_date': '20200627',
32 'duration': 661.08,
33 },
34 }, {
35 # old URL schema
ca278a18
S
36 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
37 'md5': '473d1946c1fdd050b2c0161a4b13c373',
6493f5d7 38 'info_dict': {
ca278a18 39 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
de2d9f5f 40 'ext': 'mp4',
ca278a18
S
41 'title': 'RTL Nieuws',
42 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
43 'timestamp': 1461951000,
44 'upload_date': '20160429',
45 'duration': 1167.96,
6493f5d7 46 },
97f34a48 47 'skip': '404',
59b8ab58 48 }, {
843ad179 49 # best format available a3t
59b8ab58
PH
50 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
51 'md5': 'dea7474214af1271d91ef332fb8be7ea',
52 'info_dict': {
53 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
54 'ext': 'mp4',
55 'timestamp': 1424039400,
56 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
ec85ded8 57 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
59b8ab58
PH
58 'upload_date': '20150215',
59 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
add96eb9 60 },
826a7da8 61 }, {
067aa17e 62 # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
373e1230 63 # best format available nettv
826a7da8
S
64 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
65 'info_dict': {
66 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
67 'ext': 'mp4',
68 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
ec85ded8 69 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
826a7da8
S
70 'timestamp': 1437233400,
71 'upload_date': '20150718',
72 'duration': 30.474,
73 },
74 'params': {
75 'skip_download': True,
76 },
9dfc4fa1 77 }, {
89d42c2c 78 # encrypted m3u8 streams, georestricted
9dfc4fa1
S
79 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
80 'only_matching': True,
a9d56c68
S
81 }, {
82 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
83 'only_matching': True,
7f2ed475
S
84 }, {
85 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
86 'only_matching': True,
0a3924e7
S
87 }, {
88 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
89 'only_matching': True,
7a64c33a
RA
90 }, {
91 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
92 'only_matching': True,
45f63624
S
93 }, {
94 # new embed URL schema
95 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
96 'only_matching': True,
59b8ab58 97 }]
6493f5d7
JMF
98
99 def _real_extract(self, url):
59b8ab58 100 uuid = self._match_id(url)
6493f5d7 101 info = self._download_json(
add96eb9 102 f'http://www.rtl.nl/system/s4m/vfd/version=2/uuid={uuid}/fmt=adaptive/',
6493f5d7 103 uuid)
7adcbe75 104
6493f5d7 105 material = info['material'][0]
826a7da8
S
106 title = info['abstracts'][0]['name']
107 subtitle = material.get('title')
108 if subtitle:
add96eb9 109 title += f' - {subtitle}'
826a7da8 110 description = material.get('synopsis')
6493f5d7 111
43232d5c
S
112 meta = info.get('meta', {})
113
a8b7b260 114 videopath = material['videopath']
43232d5c 115 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
de2d9f5f 116
0571ffda
S
117 formats = self._extract_m3u8_formats(
118 m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
37eddd31 119
59b8ab58 120 thumbnails = []
43232d5c 121
59b8ab58
PH
122 for p in ('poster_base_url', '"thumb_base_url"'):
123 if not meta.get(p):
124 continue
125
126 thumbnails.append({
127 'url': self._proto_relative_url(meta[p] + uuid),
128 'width': int_or_none(self._search_regex(
129 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
130 'height': int_or_none(self._search_regex(
131 r'/sz=[0-9]+x([0-9]+)',
add96eb9 132 meta[p], 'thumbnail height', fatal=False)),
59b8ab58
PH
133 })
134
6493f5d7
JMF
135 return {
136 'id': uuid,
826a7da8 137 'title': title,
de2d9f5f 138 'formats': formats,
6493f5d7 139 'timestamp': material['original_date'],
59b8ab58 140 'description': description,
7adcbe75 141 'duration': parse_duration(material.get('duration')),
59b8ab58 142 'thumbnails': thumbnails,
6493f5d7 143 }
5f2da312
H
144
145
146class RTLLuBaseIE(InfoExtractor):
147 _MEDIA_REGEX = {
148 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)',
149 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)',
150 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)',
151 }
152
153 def get_media_url(self, webpage, video_id, media_type):
154 return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None)
155
156 def get_formats_and_subtitles(self, webpage, video_id):
157 video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio')
158
159 formats, subtitles = [], {}
160 if video_url is not None:
161 formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id)
162 if audio_url is not None:
163 formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'})
164
165 return formats, subtitles
166
167 def _real_extract(self, url):
168 video_id = self._match_id(url)
169 is_live = video_id in ('live', 'live-2', 'lauschteren')
170
171 # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id>
172 # we can context from <rtl-comments context=<context> in webpage
173 webpage = self._download_webpage(url, video_id)
174
175 formats, subtitles = self.get_formats_and_subtitles(webpage, video_id)
5f2da312
H
176
177 return {
178 'id': video_id,
179 'title': self._og_search_title(webpage),
180 'description': self._og_search_description(webpage, default=None),
181 'formats': formats,
182 'subtitles': subtitles,
183 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None),
184 'is_live': is_live,
185 }
186
187
188class RTLLuTeleVODIE(RTLLuBaseIE):
189 IE_NAME = 'rtl.lu:tele-vod'
190 _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?'
191 _TESTS = [{
192 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html',
193 'info_dict': {
194 'id': '3266757',
195 'title': 'Informatiounsversammlung Héichwaasser',
196 'ext': 'mp4',
197 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg',
198 'description': 'md5:b1db974408cc858c9fd241812e4a2a14',
add96eb9 199 },
5f2da312
H
200 }, {
201 'url': 'https://www.rtl.lu/video/3295215',
202 'info_dict': {
203 'id': '3295215',
204 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht',
205 'ext': 'mp4',
206 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg',
207 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b',
add96eb9 208 },
5f2da312
H
209 }]
210
211
212class RTLLuArticleIE(RTLLuBaseIE):
213 IE_NAME = 'rtl.lu:article'
214 _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html'
215 _TESTS = [{
216 # Audio-only
217 'url': 'https://www.rtl.lu/sport/news/a/1934360.html',
218 'info_dict': {
219 'id': '1934360',
220 'ext': 'mp3',
221 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg',
222 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7',
223 'title': 'md5:40aa85f135578fbd549d3c9370321f99',
add96eb9 224 },
5f2da312
H
225 }, {
226 # 5minutes
227 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html',
228 'info_dict': {
229 'id': '1853173',
230 'ext': 'mp4',
231 'description': 'md5:ac031da0740e997a5cf4633173634fee',
232 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46',
233 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg',
add96eb9 234 },
5f2da312
H
235 }, {
236 # today.lu
237 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html',
238 'info_dict': {
239 'id': '1936203',
240 'ext': 'mp4',
241 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower',
242 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...',
243 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg',
add96eb9 244 },
5f2da312
H
245 }]
246
247
248class RTLLuLiveIE(RTLLuBaseIE):
249 _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)'
250 _TESTS = [{
251 # Tele:live
252 'url': 'https://www.rtl.lu/tele/live',
253 'info_dict': {
254 'id': 'live',
255 'ext': 'mp4',
256 'live_status': 'is_live',
257 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
258 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg',
add96eb9 259 },
5f2da312
H
260 }, {
261 # Tele:live-2
262 'url': 'https://www.rtl.lu/tele/live-2',
263 'info_dict': {
264 'id': 'live-2',
265 'ext': 'mp4',
266 'live_status': 'is_live',
267 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
268 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg',
add96eb9 269 },
5f2da312
H
270 }, {
271 # Radio:lauschteren
272 'url': 'https://www.rtl.lu/radio/lauschteren',
273 'info_dict': {
274 'id': 'lauschteren',
275 'ext': 'mp4',
276 'live_status': 'is_live',
277 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
278 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg',
add96eb9 279 },
5f2da312
H
280 }]
281
282
283class RTLLuRadioIE(RTLLuBaseIE):
284 _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?'
285 _TESTS = [{
286 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html',
287 'info_dict': {
288 'id': '4033058',
289 'ext': 'mp3',
290 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9',
291 'title': '5 vir 12 - Stau um Stau',
292 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg',
add96eb9 293 },
5f2da312 294 }]