]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/rtlnl.py
[extractor/youtube] Fix `live_status` extraction for playlist videos
[yt-dlp.git] / yt_dlp / extractor / rtlnl.py
CommitLineData
6493f5d7 1from .common import InfoExtractor
59b8ab58
PH
2from ..utils import (
3 int_or_none,
4 parse_duration,
5)
6493f5d7
JMF
6
7
59b8ab58
PH
8class RtlNlIE(InfoExtractor):
9 IE_NAME = 'rtl.nl'
10 IE_DESC = 'rtl.nl and rtlxl.nl'
bfd973ec 11 _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)']
59b8ab58 12 _VALID_URL = r'''(?x)
7a64c33a 13 https?://(?:(?:www|static)\.)?
59b8ab58 14 (?:
97f34a48 15 rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
45f63624
S
16 rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
17 embed\.rtl\.nl/\#uuid=
59b8ab58
PH
18 )
19 (?P<id>[0-9a-f-]+)'''
6493f5d7 20
59b8ab58 21 _TESTS = [{
97f34a48
DL
22 # new URL schema
23 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
24 'md5': '490428f1187b60d714f34e1f2e3af0b6',
25 'info_dict': {
26 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
27 'ext': 'mp4',
28 'title': 'RTL Nieuws',
29 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
30 'timestamp': 1593293400,
31 'upload_date': '20200627',
32 'duration': 661.08,
33 },
34 }, {
35 # old URL schema
ca278a18
S
36 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
37 'md5': '473d1946c1fdd050b2c0161a4b13c373',
6493f5d7 38 'info_dict': {
ca278a18 39 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
de2d9f5f 40 'ext': 'mp4',
ca278a18
S
41 'title': 'RTL Nieuws',
42 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
43 'timestamp': 1461951000,
44 'upload_date': '20160429',
45 'duration': 1167.96,
6493f5d7 46 },
97f34a48 47 'skip': '404',
59b8ab58 48 }, {
843ad179 49 # best format available a3t
59b8ab58
PH
50 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
51 'md5': 'dea7474214af1271d91ef332fb8be7ea',
52 'info_dict': {
53 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
54 'ext': 'mp4',
55 'timestamp': 1424039400,
56 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
ec85ded8 57 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
59b8ab58
PH
58 'upload_date': '20150215',
59 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
60 }
826a7da8 61 }, {
067aa17e 62 # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
373e1230 63 # best format available nettv
826a7da8
S
64 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
65 'info_dict': {
66 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
67 'ext': 'mp4',
68 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
ec85ded8 69 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
826a7da8
S
70 'timestamp': 1437233400,
71 'upload_date': '20150718',
72 'duration': 30.474,
73 },
74 'params': {
75 'skip_download': True,
76 },
9dfc4fa1 77 }, {
89d42c2c 78 # encrypted m3u8 streams, georestricted
9dfc4fa1
S
79 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
80 'only_matching': True,
a9d56c68
S
81 }, {
82 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
83 'only_matching': True,
7f2ed475
S
84 }, {
85 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
86 'only_matching': True,
0a3924e7
S
87 }, {
88 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
89 'only_matching': True,
7a64c33a
RA
90 }, {
91 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
92 'only_matching': True,
45f63624
S
93 }, {
94 # new embed URL schema
95 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
96 'only_matching': True,
59b8ab58 97 }]
6493f5d7
JMF
98
99 def _real_extract(self, url):
59b8ab58 100 uuid = self._match_id(url)
6493f5d7 101 info = self._download_json(
bea41c7f 102 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
6493f5d7 103 uuid)
7adcbe75 104
6493f5d7 105 material = info['material'][0]
826a7da8
S
106 title = info['abstracts'][0]['name']
107 subtitle = material.get('title')
108 if subtitle:
109 title += ' - %s' % subtitle
110 description = material.get('synopsis')
6493f5d7 111
43232d5c
S
112 meta = info.get('meta', {})
113
a8b7b260 114 videopath = material['videopath']
43232d5c 115 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
de2d9f5f 116
0571ffda
S
117 formats = self._extract_m3u8_formats(
118 m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
37eddd31
NJ
119 self._sort_formats(formats)
120
59b8ab58 121 thumbnails = []
43232d5c 122
59b8ab58
PH
123 for p in ('poster_base_url', '"thumb_base_url"'):
124 if not meta.get(p):
125 continue
126
127 thumbnails.append({
128 'url': self._proto_relative_url(meta[p] + uuid),
129 'width': int_or_none(self._search_regex(
130 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
131 'height': int_or_none(self._search_regex(
132 r'/sz=[0-9]+x([0-9]+)',
133 meta[p], 'thumbnail height', fatal=False))
134 })
135
6493f5d7
JMF
136 return {
137 'id': uuid,
826a7da8 138 'title': title,
de2d9f5f 139 'formats': formats,
6493f5d7 140 'timestamp': material['original_date'],
59b8ab58 141 'description': description,
7adcbe75 142 'duration': parse_duration(material.get('duration')),
59b8ab58 143 'thumbnails': thumbnails,
6493f5d7 144 }
5f2da312
H
145
146
147class RTLLuBaseIE(InfoExtractor):
148 _MEDIA_REGEX = {
149 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)',
150 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)',
151 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)',
152 }
153
154 def get_media_url(self, webpage, video_id, media_type):
155 return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None)
156
157 def get_formats_and_subtitles(self, webpage, video_id):
158 video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio')
159
160 formats, subtitles = [], {}
161 if video_url is not None:
162 formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id)
163 if audio_url is not None:
164 formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'})
165
166 return formats, subtitles
167
168 def _real_extract(self, url):
169 video_id = self._match_id(url)
170 is_live = video_id in ('live', 'live-2', 'lauschteren')
171
172 # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id>
173 # we can context from <rtl-comments context=<context> in webpage
174 webpage = self._download_webpage(url, video_id)
175
176 formats, subtitles = self.get_formats_and_subtitles(webpage, video_id)
177 self._sort_formats(formats)
178
179 return {
180 'id': video_id,
181 'title': self._og_search_title(webpage),
182 'description': self._og_search_description(webpage, default=None),
183 'formats': formats,
184 'subtitles': subtitles,
185 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None),
186 'is_live': is_live,
187 }
188
189
190class RTLLuTeleVODIE(RTLLuBaseIE):
191 IE_NAME = 'rtl.lu:tele-vod'
192 _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?'
193 _TESTS = [{
194 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html',
195 'info_dict': {
196 'id': '3266757',
197 'title': 'Informatiounsversammlung Héichwaasser',
198 'ext': 'mp4',
199 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg',
200 'description': 'md5:b1db974408cc858c9fd241812e4a2a14',
201 }
202 }, {
203 'url': 'https://www.rtl.lu/video/3295215',
204 'info_dict': {
205 'id': '3295215',
206 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht',
207 'ext': 'mp4',
208 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg',
209 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b',
210 }
211 }]
212
213
214class RTLLuArticleIE(RTLLuBaseIE):
215 IE_NAME = 'rtl.lu:article'
216 _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html'
217 _TESTS = [{
218 # Audio-only
219 'url': 'https://www.rtl.lu/sport/news/a/1934360.html',
220 'info_dict': {
221 'id': '1934360',
222 'ext': 'mp3',
223 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg',
224 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7',
225 'title': 'md5:40aa85f135578fbd549d3c9370321f99',
226 }
227 }, {
228 # 5minutes
229 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html',
230 'info_dict': {
231 'id': '1853173',
232 'ext': 'mp4',
233 'description': 'md5:ac031da0740e997a5cf4633173634fee',
234 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46',
235 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg',
236 }
237 }, {
238 # today.lu
239 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html',
240 'info_dict': {
241 'id': '1936203',
242 'ext': 'mp4',
243 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower',
244 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...',
245 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg',
246 }
247 }]
248
249
250class RTLLuLiveIE(RTLLuBaseIE):
251 _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)'
252 _TESTS = [{
253 # Tele:live
254 'url': 'https://www.rtl.lu/tele/live',
255 'info_dict': {
256 'id': 'live',
257 'ext': 'mp4',
258 'live_status': 'is_live',
259 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
260 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg',
261 }
262 }, {
263 # Tele:live-2
264 'url': 'https://www.rtl.lu/tele/live-2',
265 'info_dict': {
266 'id': 'live-2',
267 'ext': 'mp4',
268 'live_status': 'is_live',
269 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
270 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg',
271 }
272 }, {
273 # Radio:lauschteren
274 'url': 'https://www.rtl.lu/radio/lauschteren',
275 'info_dict': {
276 'id': 'lauschteren',
277 'ext': 'mp4',
278 'live_status': 'is_live',
279 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
280 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg',
281 }
282 }]
283
284
285class RTLLuRadioIE(RTLLuBaseIE):
286 _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?'
287 _TESTS = [{
288 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html',
289 'info_dict': {
290 'id': '4033058',
291 'ext': 'mp3',
292 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9',
293 'title': '5 vir 12 - Stau um Stau',
294 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg',
295 }
296 }]