]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/berufetv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / berufetv.py
CommitLineData
9d69c4e4
F
1from .common import InfoExtractor
2from ..utils import float_or_none, mimetype2ext, traverse_obj
3
4
5class BerufeTVIE(InfoExtractor):
6 _VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P<id>[\w-]+)'
7 _TESTS = [{
8 'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u',
9 'md5': '041b6432ec8e6838f84a5c30f31cc795',
10 'info_dict': {
11 'id': 'DvKC3DUpMKvUZ_6fEnfg3u',
12 'ext': 'mp4',
13 'title': 'Volkswirtschaftslehre',
14 'description': 'md5:6bd87d0c63163480a6489a37526ee1c1',
15 'categories': ['Studien&shy;beruf'],
16 'tags': ['Studienfilm'],
17 'duration': 602.440,
18 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
19 }
20 }]
21
22 def _real_extract(self, url):
23 video_id = self._match_id(url)
24
25 movie_metadata = self._download_json(
26 'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata',
27 video_id, 'Downloading JSON metadata',
28 headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False)
29
30 meta = traverse_obj(
31 movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']),
32 get_all=False, default={})
33
34 video = self._download_json(
35 f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}',
36 video_id, 'Downloading video JSON')
37
38 formats, subtitles = [], {}
39 for key, source in video['videoSources']['html'].items():
40 if key == 'auto':
41 fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id)
42 formats += fmts
43 subtitles = subs
44 else:
45 formats.append({
46 'url': source[0]['source'],
47 'ext': mimetype2ext(source[0]['mimeType']),
48 'format_id': key,
49 })
50
51 for track in video.get('videoTracks') or []:
52 if track.get('type') != 'SUBTITLES':
53 continue
54 subtitles.setdefault(track['language'], []).append({
55 'url': track['source'],
56 'name': track.get('label'),
57 'ext': 'vtt'
58 })
59
60 return {
61 'id': video_id,
62 'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')),
63 'description': meta.get('beschreibung'),
64 'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active',
65 'duration': float_or_none(video.get('duration'), scale=1000),
66 'categories': [meta['kategorie']] if meta.get('kategorie') else None,
67 'tags': meta.get('themengebiete'),
68 'subtitles': subtitles,
69 'formats': formats,
70 }