]>
Commit | Line | Data |
---|---|---|
6493f5d7 | 1 | from .common import InfoExtractor |
59b8ab58 PH |
2 | from ..utils import ( |
3 | int_or_none, | |
4 | parse_duration, | |
5 | ) | |
6493f5d7 JMF |
6 | |
7 | ||
59b8ab58 PH |
8 | class RtlNlIE(InfoExtractor): |
9 | IE_NAME = 'rtl.nl' | |
10 | IE_DESC = 'rtl.nl and rtlxl.nl' | |
bfd973ec | 11 | _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)'] |
59b8ab58 | 12 | _VALID_URL = r'''(?x) |
7a64c33a | 13 | https?://(?:(?:www|static)\.)? |
59b8ab58 | 14 | (?: |
97f34a48 | 15 | rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| |
45f63624 S |
16 | rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| |
17 | embed\.rtl\.nl/\#uuid= | |
59b8ab58 PH |
18 | ) |
19 | (?P<id>[0-9a-f-]+)''' | |
6493f5d7 | 20 | |
59b8ab58 | 21 | _TESTS = [{ |
97f34a48 DL |
22 | # new URL schema |
23 | 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', | |
24 | 'md5': '490428f1187b60d714f34e1f2e3af0b6', | |
25 | 'info_dict': { | |
26 | 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', | |
27 | 'ext': 'mp4', | |
28 | 'title': 'RTL Nieuws', | |
29 | 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | |
30 | 'timestamp': 1593293400, | |
31 | 'upload_date': '20200627', | |
32 | 'duration': 661.08, | |
33 | }, | |
34 | }, { | |
35 | # old URL schema | |
ca278a18 S |
36 | 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', |
37 | 'md5': '473d1946c1fdd050b2c0161a4b13c373', | |
6493f5d7 | 38 | 'info_dict': { |
ca278a18 | 39 | 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', |
de2d9f5f | 40 | 'ext': 'mp4', |
ca278a18 S |
41 | 'title': 'RTL Nieuws', |
42 | 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | |
43 | 'timestamp': 1461951000, | |
44 | 'upload_date': '20160429', | |
45 | 'duration': 1167.96, | |
6493f5d7 | 46 | }, |
97f34a48 | 47 | 'skip': '404', |
59b8ab58 | 48 | }, { |
843ad179 | 49 | # best format available a3t |
59b8ab58 PH |
50 | 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', |
51 | 'md5': 'dea7474214af1271d91ef332fb8be7ea', | |
52 | 'info_dict': { | |
53 | 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', | |
54 | 'ext': 'mp4', | |
55 | 'timestamp': 1424039400, | |
56 | 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', | |
ec85ded8 | 57 | 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', |
59b8ab58 PH |
58 | 'upload_date': '20150215', |
59 | 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', | |
60 | } | |
826a7da8 | 61 | }, { |
067aa17e | 62 | # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) |
373e1230 | 63 | # best format available nettv |
826a7da8 S |
64 | 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', |
65 | 'info_dict': { | |
66 | 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', | |
67 | 'ext': 'mp4', | |
68 | 'title': 'RTL Nieuws - Meer beelden van overval juwelier', | |
ec85ded8 | 69 | 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', |
826a7da8 S |
70 | 'timestamp': 1437233400, |
71 | 'upload_date': '20150718', | |
72 | 'duration': 30.474, | |
73 | }, | |
74 | 'params': { | |
75 | 'skip_download': True, | |
76 | }, | |
9dfc4fa1 | 77 | }, { |
89d42c2c | 78 | # encrypted m3u8 streams, georestricted |
9dfc4fa1 S |
79 | 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', |
80 | 'only_matching': True, | |
a9d56c68 S |
81 | }, { |
82 | 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', | |
83 | 'only_matching': True, | |
7f2ed475 S |
84 | }, { |
85 | 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', | |
86 | 'only_matching': True, | |
0a3924e7 S |
87 | }, { |
88 | 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', | |
89 | 'only_matching': True, | |
7a64c33a RA |
90 | }, { |
91 | 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', | |
92 | 'only_matching': True, | |
45f63624 S |
93 | }, { |
94 | # new embed URL schema | |
95 | 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', | |
96 | 'only_matching': True, | |
59b8ab58 | 97 | }] |
6493f5d7 JMF |
98 | |
99 | def _real_extract(self, url): | |
59b8ab58 | 100 | uuid = self._match_id(url) |
6493f5d7 | 101 | info = self._download_json( |
bea41c7f | 102 | 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, |
6493f5d7 | 103 | uuid) |
7adcbe75 | 104 | |
6493f5d7 | 105 | material = info['material'][0] |
826a7da8 S |
106 | title = info['abstracts'][0]['name'] |
107 | subtitle = material.get('title') | |
108 | if subtitle: | |
109 | title += ' - %s' % subtitle | |
110 | description = material.get('synopsis') | |
6493f5d7 | 111 | |
43232d5c S |
112 | meta = info.get('meta', {}) |
113 | ||
a8b7b260 | 114 | videopath = material['videopath'] |
43232d5c | 115 | m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath |
de2d9f5f | 116 | |
0571ffda S |
117 | formats = self._extract_m3u8_formats( |
118 | m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) | |
37eddd31 | 119 | |
59b8ab58 | 120 | thumbnails = [] |
43232d5c | 121 | |
59b8ab58 PH |
122 | for p in ('poster_base_url', '"thumb_base_url"'): |
123 | if not meta.get(p): | |
124 | continue | |
125 | ||
126 | thumbnails.append({ | |
127 | 'url': self._proto_relative_url(meta[p] + uuid), | |
128 | 'width': int_or_none(self._search_regex( | |
129 | r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), | |
130 | 'height': int_or_none(self._search_regex( | |
131 | r'/sz=[0-9]+x([0-9]+)', | |
132 | meta[p], 'thumbnail height', fatal=False)) | |
133 | }) | |
134 | ||
6493f5d7 JMF |
135 | return { |
136 | 'id': uuid, | |
826a7da8 | 137 | 'title': title, |
de2d9f5f | 138 | 'formats': formats, |
6493f5d7 | 139 | 'timestamp': material['original_date'], |
59b8ab58 | 140 | 'description': description, |
7adcbe75 | 141 | 'duration': parse_duration(material.get('duration')), |
59b8ab58 | 142 | 'thumbnails': thumbnails, |
6493f5d7 | 143 | } |
5f2da312 H |
144 | |
145 | ||
146 | class RTLLuBaseIE(InfoExtractor): | |
147 | _MEDIA_REGEX = { | |
148 | 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)', | |
149 | 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)', | |
150 | 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)', | |
151 | } | |
152 | ||
153 | def get_media_url(self, webpage, video_id, media_type): | |
154 | return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None) | |
155 | ||
156 | def get_formats_and_subtitles(self, webpage, video_id): | |
157 | video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio') | |
158 | ||
159 | formats, subtitles = [], {} | |
160 | if video_url is not None: | |
161 | formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id) | |
162 | if audio_url is not None: | |
163 | formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'}) | |
164 | ||
165 | return formats, subtitles | |
166 | ||
167 | def _real_extract(self, url): | |
168 | video_id = self._match_id(url) | |
169 | is_live = video_id in ('live', 'live-2', 'lauschteren') | |
170 | ||
171 | # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id> | |
172 | # we can context from <rtl-comments context=<context> in webpage | |
173 | webpage = self._download_webpage(url, video_id) | |
174 | ||
175 | formats, subtitles = self.get_formats_and_subtitles(webpage, video_id) | |
5f2da312 H |
176 | |
177 | return { | |
178 | 'id': video_id, | |
179 | 'title': self._og_search_title(webpage), | |
180 | 'description': self._og_search_description(webpage, default=None), | |
181 | 'formats': formats, | |
182 | 'subtitles': subtitles, | |
183 | 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None), | |
184 | 'is_live': is_live, | |
185 | } | |
186 | ||
187 | ||
188 | class RTLLuTeleVODIE(RTLLuBaseIE): | |
189 | IE_NAME = 'rtl.lu:tele-vod' | |
190 | _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?' | |
191 | _TESTS = [{ | |
192 | 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html', | |
193 | 'info_dict': { | |
194 | 'id': '3266757', | |
195 | 'title': 'Informatiounsversammlung Héichwaasser', | |
196 | 'ext': 'mp4', | |
197 | 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg', | |
198 | 'description': 'md5:b1db974408cc858c9fd241812e4a2a14', | |
199 | } | |
200 | }, { | |
201 | 'url': 'https://www.rtl.lu/video/3295215', | |
202 | 'info_dict': { | |
203 | 'id': '3295215', | |
204 | 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht', | |
205 | 'ext': 'mp4', | |
206 | 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg', | |
207 | 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b', | |
208 | } | |
209 | }] | |
210 | ||
211 | ||
212 | class RTLLuArticleIE(RTLLuBaseIE): | |
213 | IE_NAME = 'rtl.lu:article' | |
214 | _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html' | |
215 | _TESTS = [{ | |
216 | # Audio-only | |
217 | 'url': 'https://www.rtl.lu/sport/news/a/1934360.html', | |
218 | 'info_dict': { | |
219 | 'id': '1934360', | |
220 | 'ext': 'mp3', | |
221 | 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg', | |
222 | 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7', | |
223 | 'title': 'md5:40aa85f135578fbd549d3c9370321f99', | |
224 | } | |
225 | }, { | |
226 | # 5minutes | |
227 | 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html', | |
228 | 'info_dict': { | |
229 | 'id': '1853173', | |
230 | 'ext': 'mp4', | |
231 | 'description': 'md5:ac031da0740e997a5cf4633173634fee', | |
232 | 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46', | |
233 | 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg', | |
234 | } | |
235 | }, { | |
236 | # today.lu | |
237 | 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html', | |
238 | 'info_dict': { | |
239 | 'id': '1936203', | |
240 | 'ext': 'mp4', | |
241 | 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower', | |
242 | 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...', | |
243 | 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg', | |
244 | } | |
245 | }] | |
246 | ||
247 | ||
248 | class RTLLuLiveIE(RTLLuBaseIE): | |
249 | _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)' | |
250 | _TESTS = [{ | |
251 | # Tele:live | |
252 | 'url': 'https://www.rtl.lu/tele/live', | |
253 | 'info_dict': { | |
254 | 'id': 'live', | |
255 | 'ext': 'mp4', | |
256 | 'live_status': 'is_live', | |
257 | 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | |
258 | 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg', | |
259 | } | |
260 | }, { | |
261 | # Tele:live-2 | |
262 | 'url': 'https://www.rtl.lu/tele/live-2', | |
263 | 'info_dict': { | |
264 | 'id': 'live-2', | |
265 | 'ext': 'mp4', | |
266 | 'live_status': 'is_live', | |
267 | 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | |
268 | 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg', | |
269 | } | |
270 | }, { | |
271 | # Radio:lauschteren | |
272 | 'url': 'https://www.rtl.lu/radio/lauschteren', | |
273 | 'info_dict': { | |
274 | 'id': 'lauschteren', | |
275 | 'ext': 'mp4', | |
276 | 'live_status': 'is_live', | |
277 | 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | |
278 | 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg', | |
279 | } | |
280 | }] | |
281 | ||
282 | ||
283 | class RTLLuRadioIE(RTLLuBaseIE): | |
284 | _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?' | |
285 | _TESTS = [{ | |
286 | 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html', | |
287 | 'info_dict': { | |
288 | 'id': '4033058', | |
289 | 'ext': 'mp3', | |
290 | 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9', | |
291 | 'title': '5 vir 12 - Stau um Stau', | |
292 | 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg', | |
293 | } | |
294 | }] |