]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/orf.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / orf.py
1 import base64
2 import functools
3 import re
4
5 from .common import InfoExtractor
6 from ..networking import HEADRequest
7 from ..utils import (
8 InAdvancePagedList,
9 clean_html,
10 determine_ext,
11 float_or_none,
12 int_or_none,
13 join_nonempty,
14 make_archive_id,
15 mimetype2ext,
16 orderedSet,
17 parse_age_limit,
18 remove_end,
19 smuggle_url,
20 strip_jsonp,
21 try_call,
22 unescapeHTML,
23 unified_strdate,
24 unsmuggle_url,
25 url_or_none,
26 )
27 from ..utils.traversal import traverse_obj
28
29
30 class ORFTVthekIE(InfoExtractor):
31 IE_NAME = 'orf:tvthek'
32 IE_DESC = 'ORF TVthek'
33 _VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])'
34
35 _TESTS = [{
36 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079',
37 'info_dict': {
38 'id': '14121079',
39 },
40 'playlist_count': 11,
41 'params': {'noplaylist': True}
42 }, {
43 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
44 'info_dict': {
45 'id': '14121079',
46 },
47 'playlist_count': 1,
48 'params': {'playlist_items': '5'}
49 }, {
50 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
51 'info_dict': {
52 'id': '14121079',
53 },
54 'playlist': [{
55 'info_dict': {
56 'id': '15083150',
57 'ext': 'mp4',
58 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04',
59 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg',
60 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?',
61 }
62 }],
63 'playlist_count': 1,
64 'params': {'noplaylist': True, 'skip_download': 'm3u8'}
65 }, {
66 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
67 'playlist': [{
68 'md5': '2942210346ed779588f428a92db88712',
69 'info_dict': {
70 'id': '8896777',
71 'ext': 'mp4',
72 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
73 'description': 'md5:c1272f0245537812d4e36419c207b67d',
74 'duration': 2668,
75 'upload_date': '20141208',
76 },
77 }],
78 'skip': 'Blocked outside of Austria / Germany',
79 }, {
80 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
81 'info_dict': {
82 'id': '7982259',
83 'ext': 'mp4',
84 'title': 'Best of Ingrid Thurnher',
85 'upload_date': '20140527',
86 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
87 },
88 'params': {
89 'skip_download': True, # rtsp downloads
90 },
91 'skip': 'Blocked outside of Austria / Germany',
92 }, {
93 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
94 'only_matching': True,
95 }, {
96 'url': 'http://tvthek.orf.at/profile/Universum/35429',
97 'only_matching': True,
98 }]
99
100 def _pagefunc(self, url, data_jsb, n, *, image=None):
101 sd = data_jsb[n]
102 video_id, title = str(sd['id']), sd['title']
103 formats = []
104 for fd in sd['sources']:
105 src = url_or_none(fd.get('src'))
106 if not src:
107 continue
108 format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
109 ext = determine_ext(src)
110 if ext == 'm3u8':
111 m3u8_formats = self._extract_m3u8_formats(
112 src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest')
113 if any('/geoprotection' in f['url'] for f in m3u8_formats):
114 self.raise_geo_restricted()
115 formats.extend(m3u8_formats)
116 elif ext == 'f4m':
117 formats.extend(self._extract_f4m_formats(
118 src, video_id, f4m_id=format_id, fatal=False))
119 elif ext == 'mpd':
120 formats.extend(self._extract_mpd_formats(
121 src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest'))
122 else:
123 formats.append({
124 'format_id': format_id,
125 'url': src,
126 'protocol': fd.get('protocol'),
127 })
128
129 # Check for geoblocking.
130 # There is a property is_geoprotection, but that's always false
131 geo_str = sd.get('geoprotection_string')
132 http_url = next(
133 (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])),
134 None) if geo_str else None
135 if http_url:
136 self._request_webpage(
137 HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
138 errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
139
140 subtitles = {}
141 for sub in sd.get('subtitles', []):
142 sub_src = sub.get('src')
143 if not sub_src:
144 continue
145 subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
146 'url': sub_src,
147 })
148
149 upload_date = unified_strdate(sd.get('created_date'))
150
151 thumbnails = []
152 preview = sd.get('preview_image_url')
153 if preview:
154 thumbnails.append({
155 'id': 'preview',
156 'url': preview,
157 'preference': 0,
158 })
159 image = sd.get('image_full_url') or image
160 if image:
161 thumbnails.append({
162 'id': 'full',
163 'url': image,
164 'preference': 1,
165 })
166
167 yield {
168 'id': video_id,
169 'title': title,
170 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}),
171 'formats': formats,
172 'subtitles': subtitles,
173 'description': sd.get('description'),
174 'duration': int_or_none(sd.get('duration_in_seconds')),
175 'upload_date': upload_date,
176 'thumbnails': thumbnails,
177 }
178
179 def _real_extract(self, url):
180 url, smuggled_data = unsmuggle_url(url)
181 playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url')
182 webpage = self._download_webpage(url, playlist_id)
183
184 data_jsb = self._parse_json(
185 self._search_regex(
186 r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
187 webpage, 'playlist', group='json'),
188 playlist_id, transform_source=unescapeHTML)['playlist']['videos']
189
190 if not self._yes_playlist(playlist_id, video_id, smuggled_data):
191 data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id]
192
193 playlist_count = len(data_jsb)
194 image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None
195
196 page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image)
197 return {
198 '_type': 'playlist',
199 'entries': InAdvancePagedList(page_func, playlist_count, 1),
200 'id': playlist_id,
201 }
202
203
204 class ORFRadioIE(InfoExtractor):
205 IE_NAME = 'orf:radio'
206
207 STATION_INFO = {
208 'fm4': ('fm4', 'fm4', 'orffm4'),
209 'noe': ('noe', 'oe2n', 'orfnoe'),
210 'wien': ('wie', 'oe2w', 'orfwie'),
211 'burgenland': ('bgl', 'oe2b', 'orfbgl'),
212 'ooe': ('ooe', 'oe2o', 'orfooe'),
213 'steiermark': ('stm', 'oe2st', 'orfstm'),
214 'kaernten': ('ktn', 'oe2k', 'orfktn'),
215 'salzburg': ('sbg', 'oe2s', 'orfsbg'),
216 'tirol': ('tir', 'oe2t', 'orftir'),
217 'vorarlberg': ('vbg', 'oe2v', 'orfvbg'),
218 'oe3': ('oe3', 'oe3', 'orfoe3'),
219 'oe1': ('oe1', 'oe1', 'orfoe1'),
220 }
221 _STATION_RE = '|'.join(map(re.escape, STATION_INFO.keys()))
222
223 _VALID_URL = rf'''(?x)
224 https?://(?:
225 (?P<station>{_STATION_RE})\.orf\.at/player|
226 radiothek\.orf\.at/(?P<station2>{_STATION_RE})
227 )/(?P<date>[0-9]+)/(?P<show>\w+)'''
228
229 _TESTS = [{
230 'url': 'https://radiothek.orf.at/ooe/20220801/OGMO',
231 'info_dict': {
232 'id': 'OGMO',
233 'title': 'Guten Morgen OÖ',
234 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
235 },
236 'playlist': [{
237 'md5': 'f33147d954a326e338ea52572c2810e8',
238 'info_dict': {
239 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
240 'ext': 'mp3',
241 'title': 'Guten Morgen OÖ',
242 'upload_date': '20220801',
243 'duration': 18000,
244 'timestamp': 1659322789,
245 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
246 }
247 }]
248 }, {
249 'url': 'https://ooe.orf.at/player/20220801/OGMO',
250 'info_dict': {
251 'id': 'OGMO',
252 'title': 'Guten Morgen OÖ',
253 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
254 },
255 'playlist': [{
256 'md5': 'f33147d954a326e338ea52572c2810e8',
257 'info_dict': {
258 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
259 'ext': 'mp3',
260 'title': 'Guten Morgen OÖ',
261 'upload_date': '20220801',
262 'duration': 18000,
263 'timestamp': 1659322789,
264 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
265 }
266 }]
267 }, {
268 'url': 'http://fm4.orf.at/player/20170107/4CC',
269 'only_matching': True,
270 }, {
271 'url': 'https://noe.orf.at/player/20200423/NGM',
272 'only_matching': True,
273 }, {
274 'url': 'https://wien.orf.at/player/20200423/WGUM',
275 'only_matching': True,
276 }, {
277 'url': 'https://burgenland.orf.at/player/20200423/BGM',
278 'only_matching': True,
279 }, {
280 'url': 'https://steiermark.orf.at/player/20200423/STGMS',
281 'only_matching': True,
282 }, {
283 'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
284 'only_matching': True,
285 }, {
286 'url': 'https://salzburg.orf.at/player/20200423/SGUM',
287 'only_matching': True,
288 }, {
289 'url': 'https://tirol.orf.at/player/20200423/TGUMO',
290 'only_matching': True,
291 }, {
292 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
293 'only_matching': True,
294 }, {
295 'url': 'https://oe3.orf.at/player/20200424/3WEK',
296 'only_matching': True,
297 }, {
298 'url': 'http://oe1.orf.at/player/20170108/456544',
299 'md5': '34d8a6e67ea888293741c86a099b745b',
300 'info_dict': {
301 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
302 'ext': 'mp3',
303 'title': 'Morgenjournal',
304 'duration': 609,
305 'timestamp': 1483858796,
306 'upload_date': '20170108',
307 },
308 'skip': 'Shows from ORF radios are only available for 7 days.'
309 }]
310
311 def _entries(self, data, station):
312 _, loop_station, old_ie = self.STATION_INFO[station]
313 for info in data['streams']:
314 item_id = info.get('loopStreamId')
315 if not item_id:
316 continue
317 video_id = item_id.replace('.mp3', '')
318 yield {
319 'id': video_id,
320 'ext': 'mp3',
321 'url': f'https://loopstream01.apa.at/?channel={loop_station}&id={item_id}',
322 '_old_archive_ids': [make_archive_id(old_ie, video_id)],
323 'title': data.get('title'),
324 'description': clean_html(data.get('subtitle')),
325 'duration': try_call(lambda: (info['end'] - info['start']) / 1000),
326 'timestamp': int_or_none(info.get('start'), scale=1000),
327 'series': data.get('programTitle'),
328 }
329
330 def _real_extract(self, url):
331 station, station2, show_date, show_id = self._match_valid_url(url).group('station', 'station2', 'date', 'show')
332 api_station, _, _ = self.STATION_INFO[station or station2]
333 data = self._download_json(
334 f'http://audioapi.orf.at/{api_station}/api/json/current/broadcast/{show_id}/{show_date}', show_id)
335
336 return self.playlist_result(
337 self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
338
339
340 class ORFPodcastIE(InfoExtractor):
341 IE_NAME = 'orf:podcast'
342 _STATION_RE = '|'.join(map(re.escape, (
343 'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3',
344 'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie')))
345 _VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'
346 _TESTS = [{
347 'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023',
348 'md5': '526a5700e03d271a1505386a8721ab9b',
349 'info_dict': {
350 'id': 'nicolas-stockhammer-15102023',
351 'ext': 'mp3',
352 'title': 'Nicolas Stockhammer (15.10.2023)',
353 'duration': 3396.0,
354 'series': 'Frühstück bei mir',
355 },
356 'skip': 'ORF podcasts are only available for a limited time'
357 }]
358
359 def _real_extract(self, url):
360 station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
361 data = self._download_json(
362 f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id)
363
364 return {
365 'id': show_id,
366 'ext': 'mp3',
367 'vcodec': 'none',
368 **traverse_obj(data, ('payload', {
369 'url': ('enclosures', 0, 'url'),
370 'ext': ('enclosures', 0, 'type', {mimetype2ext}),
371 'title': 'title',
372 'description': ('description', {clean_html}),
373 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
374 'series': ('podcast', 'title'),
375 })),
376 }
377
378
379 class ORFIPTVIE(InfoExtractor):
380 IE_NAME = 'orf:iptv'
381 IE_DESC = 'iptv.ORF.at'
382 _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
383
384 _TEST = {
385 'url': 'http://iptv.orf.at/stories/2275236/',
386 'md5': 'c8b22af4718a4b4af58342529453e3e5',
387 'info_dict': {
388 'id': '350612',
389 'ext': 'flv',
390 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
391 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
392 'duration': 68.197,
393 'thumbnail': r're:^https?://.*\.jpg$',
394 'upload_date': '20150425',
395 },
396 }
397
398 def _real_extract(self, url):
399 story_id = self._match_id(url)
400
401 webpage = self._download_webpage(
402 'http://iptv.orf.at/stories/%s' % story_id, story_id)
403
404 video_id = self._search_regex(
405 r'data-video(?:id)?="(\d+)"', webpage, 'video id')
406
407 data = self._download_json(
408 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
409 video_id)[0]
410
411 duration = float_or_none(data['duration'], 1000)
412
413 video = data['sources']['default']
414 load_balancer_url = video['loadBalancerUrl']
415 abr = int_or_none(video.get('audioBitrate'))
416 vbr = int_or_none(video.get('bitrate'))
417 fps = int_or_none(video.get('videoFps'))
418 width = int_or_none(video.get('videoWidth'))
419 height = int_or_none(video.get('videoHeight'))
420 thumbnail = video.get('preview')
421
422 rendition = self._download_json(
423 load_balancer_url, video_id, transform_source=strip_jsonp)
424
425 f = {
426 'abr': abr,
427 'vbr': vbr,
428 'fps': fps,
429 'width': width,
430 'height': height,
431 }
432
433 formats = []
434 for format_id, format_url in rendition['redirect'].items():
435 if format_id == 'rtmp':
436 ff = f.copy()
437 ff.update({
438 'url': format_url,
439 'format_id': format_id,
440 })
441 formats.append(ff)
442 elif determine_ext(format_url) == 'f4m':
443 formats.extend(self._extract_f4m_formats(
444 format_url, video_id, f4m_id=format_id))
445 elif determine_ext(format_url) == 'm3u8':
446 formats.extend(self._extract_m3u8_formats(
447 format_url, video_id, 'mp4', m3u8_id=format_id))
448 else:
449 continue
450
451 title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
452 description = self._og_search_description(webpage)
453 upload_date = unified_strdate(self._html_search_meta(
454 'dc.date', webpage, 'upload date'))
455
456 return {
457 'id': video_id,
458 'title': title,
459 'description': description,
460 'duration': duration,
461 'thumbnail': thumbnail,
462 'upload_date': upload_date,
463 'formats': formats,
464 }
465
466
467 class ORFFM4StoryIE(InfoExtractor):
468 IE_NAME = 'orf:fm4:story'
469 IE_DESC = 'fm4.orf.at stories'
470 _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
471
472 _TEST = {
473 'url': 'http://fm4.orf.at/stories/2865738/',
474 'playlist': [{
475 'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
476 'info_dict': {
477 'id': '547792',
478 'ext': 'flv',
479 'title': 'Manu Delago und Inner Tongue live',
480 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
481 'duration': 1748.52,
482 'thumbnail': r're:^https?://.*\.jpg$',
483 'upload_date': '20170913',
484 },
485 }, {
486 'md5': 'c6dd2179731f86f4f55a7b49899d515f',
487 'info_dict': {
488 'id': '547798',
489 'ext': 'flv',
490 'title': 'Manu Delago und Inner Tongue live (2)',
491 'duration': 1504.08,
492 'thumbnail': r're:^https?://.*\.jpg$',
493 'upload_date': '20170913',
494 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
495 },
496 }],
497 }
498
499 def _real_extract(self, url):
500 story_id = self._match_id(url)
501 webpage = self._download_webpage(url, story_id)
502
503 entries = []
504 all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
505 for idx, video_id in enumerate(all_ids):
506 data = self._download_json(
507 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
508 video_id)[0]
509
510 duration = float_or_none(data['duration'], 1000)
511
512 video = data['sources']['q8c']
513 load_balancer_url = video['loadBalancerUrl']
514 abr = int_or_none(video.get('audioBitrate'))
515 vbr = int_or_none(video.get('bitrate'))
516 fps = int_or_none(video.get('videoFps'))
517 width = int_or_none(video.get('videoWidth'))
518 height = int_or_none(video.get('videoHeight'))
519 thumbnail = video.get('preview')
520
521 rendition = self._download_json(
522 load_balancer_url, video_id, transform_source=strip_jsonp)
523
524 f = {
525 'abr': abr,
526 'vbr': vbr,
527 'fps': fps,
528 'width': width,
529 'height': height,
530 }
531
532 formats = []
533 for format_id, format_url in rendition['redirect'].items():
534 if format_id == 'rtmp':
535 ff = f.copy()
536 ff.update({
537 'url': format_url,
538 'format_id': format_id,
539 })
540 formats.append(ff)
541 elif determine_ext(format_url) == 'f4m':
542 formats.extend(self._extract_f4m_formats(
543 format_url, video_id, f4m_id=format_id))
544 elif determine_ext(format_url) == 'm3u8':
545 formats.extend(self._extract_m3u8_formats(
546 format_url, video_id, 'mp4', m3u8_id=format_id))
547 else:
548 continue
549
550 title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
551 if idx >= 1:
552 # Titles are duplicates, make them unique
553 title += ' (' + str(idx + 1) + ')'
554 description = self._og_search_description(webpage)
555 upload_date = unified_strdate(self._html_search_meta(
556 'dc.date', webpage, 'upload date'))
557
558 entries.append({
559 'id': video_id,
560 'title': title,
561 'description': description,
562 'duration': duration,
563 'thumbnail': thumbnail,
564 'upload_date': upload_date,
565 'formats': formats,
566 })
567
568 return self.playlist_result(entries)
569
570
571 class ORFONIE(InfoExtractor):
572 IE_NAME = 'orf:on'
573 _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)'
574 _TESTS = [{
575 'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
576 'info_dict': {
577 'id': '14210000',
578 'ext': 'mp4',
579 'duration': 2651.08,
580 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
581 'title': 'School of Champions (4/8)',
582 'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
583 'media_type': 'episode',
584 'timestamp': 1706472362,
585 'upload_date': '20240128',
586 }
587 }, {
588 'url': 'https://on.orf.at/video/3220355',
589 'md5': 'f94d98e667cf9a3851317efb4e136662',
590 'info_dict': {
591 'id': '3220355',
592 'ext': 'mp4',
593 'duration': 445.04,
594 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png',
595 'title': '50 Jahre Burgenland: Der Festumzug',
596 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0',
597 'media_type': 'episode',
598 'timestamp': 52916400,
599 'upload_date': '19710905',
600 }
601 }]
602
603 def _extract_video(self, video_id):
604 encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
605 api_json = self._download_json(
606 f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
607
608 if traverse_obj(api_json, 'is_drm_protected'):
609 self.report_drm(video_id)
610
611 formats, subtitles = [], {}
612 for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
613 for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
614 if manifest_type == 'hls':
615 fmts, subs = self._extract_m3u8_formats_and_subtitles(
616 manifest_url, video_id, fatal=False, m3u8_id='hls')
617 elif manifest_type == 'dash':
618 fmts, subs = self._extract_mpd_formats_and_subtitles(
619 manifest_url, video_id, fatal=False, mpd_id='dash')
620 else:
621 continue
622 formats.extend(fmts)
623 self._merge_subtitles(subs, target=subtitles)
624
625 for sub_url in traverse_obj(api_json, (
626 '_embedded', 'subtitle',
627 ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})):
628 self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles)
629
630 return {
631 'id': video_id,
632 'formats': formats,
633 'subtitles': subtitles,
634 **traverse_obj(api_json, {
635 'age_limit': ('age_classification', {parse_age_limit}),
636 'duration': ('duration_second', {float_or_none}),
637 'title': (('title', 'headline'), {str}),
638 'description': (('description', 'teaser_text'), {str}),
639 'media_type': ('video_type', {str}),
640 }, get_all=False),
641 }
642
643 def _real_extract(self, url):
644 video_id = self._match_id(url)
645 webpage = self._download_webpage(url, video_id)
646
647 return {
648 'id': video_id,
649 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
650 'description': self._html_search_meta(
651 ['description', 'og:description', 'twitter:description'], webpage, default=None),
652 **self._search_json_ld(webpage, video_id, fatal=False),
653 **self._extract_video(video_id),
654 }