]>
Commit | Line | Data |
---|---|---|
e3a3ed8a | 1 | import functools |
00a3e47b | 2 | import re |
00a3e47b SS |
3 | |
4 | from .common import InfoExtractor | |
5 | from ..networking.exceptions import HTTPError | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | bug_reports_message, | |
9 | clean_html, | |
10 | format_field, | |
11 | get_element_text_and_html_by_tag, | |
12 | int_or_none, | |
13 | url_or_none, | |
14 | ) | |
15 | from ..utils.traversal import traverse_obj | |
16 | ||
17 | ||
18 | class BundestagIE(InfoExtractor): | |
19 | _VALID_URL = [ | |
20 | r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)', | |
21 | r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)', | |
22 | ] | |
23 | _TESTS = [{ | |
24 | 'url': 'https://dbtg.tv/cvid/7605304', | |
25 | 'info_dict': { | |
26 | 'id': '7605304', | |
27 | 'ext': 'mp4', | |
28 | 'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit', | |
29 | 'description': 'md5:321a9dc6bdad201264c0045efc371561', | |
30 | }, | |
31 | }, { | |
32 | 'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek', | |
33 | 'info_dict': { | |
34 | 'id': '7602120', | |
35 | 'ext': 'mp4', | |
36 | 'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung', | |
37 | 'description': 'Befragung der Bundesregierung', | |
38 | }, | |
39 | }, { | |
40 | 'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'http://dbtg.tv/fvid/3594346', | |
44 | 'only_matching': True, | |
45 | }] | |
46 | ||
47 | _OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay' | |
48 | _INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8' | |
49 | ||
50 | _SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId=' | |
51 | _SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)' | |
52 | _SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)' | |
53 | ||
54 | def _bt_extract_share_formats(self, video_id): | |
55 | share_data = self._download_json( | |
56 | f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON') | |
57 | if traverse_obj(share_data, ('status', 'code', {int})) != 1: | |
58 | self.report_warning(format_field( | |
59 | share_data, [('status', 'message', {str})], | |
60 | 'Share API response: %s', default='Unknown Share API Error') | |
61 | + bug_reports_message()) | |
62 | return | |
63 | ||
64 | for name, url in share_data.items(): | |
65 | if not isinstance(name, str) or not url_or_none(url): | |
66 | continue | |
67 | ||
68 | elif name.startswith('audio'): | |
69 | match = re.search(self._SHARE_AUDIO_REGEX, url) | |
70 | yield { | |
71 | 'format_id': name, | |
72 | 'url': url, | |
73 | 'vcodec': 'none', | |
74 | **traverse_obj(match, { | |
75 | 'acodec': 'codec', | |
76 | 'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}), | |
77 | 'abr': ('bitrate', {int_or_none}), | |
78 | 'ext': 'ext', | |
79 | }), | |
80 | } | |
81 | ||
82 | elif name.startswith('download'): | |
83 | match = re.search(self._SHARE_VIDEO_REGEX, url) | |
84 | yield { | |
85 | 'format_id': name, | |
86 | 'url': url, | |
87 | **traverse_obj(match, { | |
88 | 'vcodec': 'codec', | |
89 | 'tbr': ('bitrate', {int_or_none}), | |
90 | 'width': ('width', {int_or_none}), | |
91 | 'height': ('height', {int_or_none}), | |
92 | 'ext': 'ext', | |
93 | }), | |
94 | } | |
95 | ||
96 | def _real_extract(self, url): | |
97 | video_id = self._match_id(url) | |
98 | formats = [] | |
99 | result = {'id': video_id, 'formats': formats} | |
100 | ||
101 | try: | |
102 | formats.extend(self._extract_m3u8_formats( | |
103 | self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance')) | |
104 | except ExtractorError as error: | |
105 | if isinstance(error.cause, HTTPError) and error.cause.status == 404: | |
106 | raise ExtractorError('Could not find video id', expected=True) | |
107 | self.report_warning(f'Error extracting hls formats: {error}', video_id) | |
108 | formats.extend(self._bt_extract_share_formats(video_id)) | |
109 | if not formats: | |
110 | self.raise_no_formats('Could not find suitable formats', video_id=video_id) | |
111 | ||
112 | result.update(traverse_obj(self._download_webpage( | |
113 | self._OVERLAY_URL, video_id, | |
114 | query={'videoid': video_id, 'view': 'main'}, | |
115 | note='Downloading metadata overlay', fatal=False, | |
116 | ), { | |
117 | 'title': ( | |
e3a3ed8a | 118 | {functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0, |
119 | {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), | |
120 | 'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}), | |
00a3e47b SS |
121 | })) |
122 | ||
123 | return result |