]>
Commit | Line | Data |
---|---|---|
1 | import base64 | |
2 | import functools | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | clean_html, | |
8 | determine_ext, | |
9 | float_or_none, | |
10 | int_or_none, | |
11 | make_archive_id, | |
12 | mimetype2ext, | |
13 | orderedSet, | |
14 | parse_age_limit, | |
15 | parse_iso8601, | |
16 | remove_end, | |
17 | str_or_none, | |
18 | strip_jsonp, | |
19 | try_call, | |
20 | unified_strdate, | |
21 | url_or_none, | |
22 | ) | |
23 | from ..utils.traversal import traverse_obj | |
24 | ||
25 | ||
26 | class ORFRadioIE(InfoExtractor): | |
27 | IE_NAME = 'orf:radio' | |
28 | ||
29 | STATION_INFO = { | |
30 | 'fm4': ('fm4', 'fm4', 'orffm4'), | |
31 | 'noe': ('noe', 'oe2n', 'orfnoe'), | |
32 | 'wien': ('wie', 'oe2w', 'orfwie'), | |
33 | 'burgenland': ('bgl', 'oe2b', 'orfbgl'), | |
34 | 'ooe': ('ooe', 'oe2o', 'orfooe'), | |
35 | 'steiermark': ('stm', 'oe2st', 'orfstm'), | |
36 | 'kaernten': ('ktn', 'oe2k', 'orfktn'), | |
37 | 'salzburg': ('sbg', 'oe2s', 'orfsbg'), | |
38 | 'tirol': ('tir', 'oe2t', 'orftir'), | |
39 | 'vorarlberg': ('vbg', 'oe2v', 'orfvbg'), | |
40 | 'oe3': ('oe3', 'oe3', 'orfoe3'), | |
41 | 'oe1': ('oe1', 'oe1', 'orfoe1'), | |
42 | } | |
43 | _STATION_RE = '|'.join(map(re.escape, STATION_INFO.keys())) | |
44 | ||
45 | _VALID_URL = rf'''(?x) | |
46 | https?://(?: | |
47 | (?P<station>{_STATION_RE})\.orf\.at/player| | |
48 | radiothek\.orf\.at/(?P<station2>{_STATION_RE}) | |
49 | )/(?P<date>[0-9]+)/(?P<show>\w+)''' | |
50 | ||
51 | _TESTS = [{ | |
52 | 'url': 'https://radiothek.orf.at/ooe/20220801/OGMO', | |
53 | 'info_dict': { | |
54 | 'id': 'OGMO', | |
55 | 'title': 'Guten Morgen OÖ', | |
56 | 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a', | |
57 | }, | |
58 | 'playlist': [{ | |
59 | 'md5': 'f33147d954a326e338ea52572c2810e8', | |
60 | 'info_dict': { | |
61 | 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062', | |
62 | 'ext': 'mp3', | |
63 | 'title': 'Guten Morgen OÖ', | |
64 | 'upload_date': '20220801', | |
65 | 'duration': 18000, | |
66 | 'timestamp': 1659322789, | |
67 | 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a', | |
68 | }, | |
69 | }], | |
70 | }, { | |
71 | 'url': 'https://ooe.orf.at/player/20220801/OGMO', | |
72 | 'info_dict': { | |
73 | 'id': 'OGMO', | |
74 | 'title': 'Guten Morgen OÖ', | |
75 | 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a', | |
76 | }, | |
77 | 'playlist': [{ | |
78 | 'md5': 'f33147d954a326e338ea52572c2810e8', | |
79 | 'info_dict': { | |
80 | 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062', | |
81 | 'ext': 'mp3', | |
82 | 'title': 'Guten Morgen OÖ', | |
83 | 'upload_date': '20220801', | |
84 | 'duration': 18000, | |
85 | 'timestamp': 1659322789, | |
86 | 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a', | |
87 | }, | |
88 | }], | |
89 | }, { | |
90 | 'url': 'http://fm4.orf.at/player/20170107/4CC', | |
91 | 'only_matching': True, | |
92 | }, { | |
93 | 'url': 'https://noe.orf.at/player/20200423/NGM', | |
94 | 'only_matching': True, | |
95 | }, { | |
96 | 'url': 'https://wien.orf.at/player/20200423/WGUM', | |
97 | 'only_matching': True, | |
98 | }, { | |
99 | 'url': 'https://burgenland.orf.at/player/20200423/BGM', | |
100 | 'only_matching': True, | |
101 | }, { | |
102 | 'url': 'https://steiermark.orf.at/player/20200423/STGMS', | |
103 | 'only_matching': True, | |
104 | }, { | |
105 | 'url': 'https://kaernten.orf.at/player/20200423/KGUMO', | |
106 | 'only_matching': True, | |
107 | }, { | |
108 | 'url': 'https://salzburg.orf.at/player/20200423/SGUM', | |
109 | 'only_matching': True, | |
110 | }, { | |
111 | 'url': 'https://tirol.orf.at/player/20200423/TGUMO', | |
112 | 'only_matching': True, | |
113 | }, { | |
114 | 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM', | |
115 | 'only_matching': True, | |
116 | }, { | |
117 | 'url': 'https://oe3.orf.at/player/20200424/3WEK', | |
118 | 'only_matching': True, | |
119 | }, { | |
120 | 'url': 'http://oe1.orf.at/player/20170108/456544', | |
121 | 'md5': '34d8a6e67ea888293741c86a099b745b', | |
122 | 'info_dict': { | |
123 | 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141', | |
124 | 'ext': 'mp3', | |
125 | 'title': 'Morgenjournal', | |
126 | 'duration': 609, | |
127 | 'timestamp': 1483858796, | |
128 | 'upload_date': '20170108', | |
129 | }, | |
130 | 'skip': 'Shows from ORF radios are only available for 7 days.', | |
131 | }] | |
132 | ||
133 | def _entries(self, data, station): | |
134 | _, loop_station, old_ie = self.STATION_INFO[station] | |
135 | for info in data['streams']: | |
136 | item_id = info.get('loopStreamId') | |
137 | if not item_id: | |
138 | continue | |
139 | video_id = item_id.replace('.mp3', '') | |
140 | yield { | |
141 | 'id': video_id, | |
142 | 'ext': 'mp3', | |
143 | 'url': f'https://loopstream01.apa.at/?channel={loop_station}&id={item_id}', | |
144 | '_old_archive_ids': [make_archive_id(old_ie, video_id)], | |
145 | 'title': data.get('title'), | |
146 | 'description': clean_html(data.get('subtitle')), | |
147 | 'duration': try_call(lambda: (info['end'] - info['start']) / 1000), | |
148 | 'timestamp': int_or_none(info.get('start'), scale=1000), | |
149 | 'series': data.get('programTitle'), | |
150 | } | |
151 | ||
152 | def _real_extract(self, url): | |
153 | station, station2, show_date, show_id = self._match_valid_url(url).group('station', 'station2', 'date', 'show') | |
154 | api_station, _, _ = self.STATION_INFO[station or station2] | |
155 | data = self._download_json( | |
156 | f'http://audioapi.orf.at/{api_station}/api/json/current/broadcast/{show_id}/{show_date}', show_id) | |
157 | ||
158 | return self.playlist_result( | |
159 | self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle'))) | |
160 | ||
161 | ||
162 | class ORFPodcastIE(InfoExtractor): | |
163 | IE_NAME = 'orf:podcast' | |
164 | _STATION_RE = '|'.join(map(re.escape, ( | |
165 | 'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3', | |
166 | 'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie'))) | |
167 | _VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)' | |
168 | _TESTS = [{ | |
169 | 'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023', | |
170 | 'md5': '526a5700e03d271a1505386a8721ab9b', | |
171 | 'info_dict': { | |
172 | 'id': 'nicolas-stockhammer-15102023', | |
173 | 'ext': 'mp3', | |
174 | 'title': 'Nicolas Stockhammer (15.10.2023)', | |
175 | 'duration': 3396.0, | |
176 | 'series': 'Frühstück bei mir', | |
177 | }, | |
178 | 'skip': 'ORF podcasts are only available for a limited time', | |
179 | }] | |
180 | ||
181 | def _real_extract(self, url): | |
182 | station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id') | |
183 | data = self._download_json( | |
184 | f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id) | |
185 | ||
186 | return { | |
187 | 'id': show_id, | |
188 | 'ext': 'mp3', | |
189 | 'vcodec': 'none', | |
190 | **traverse_obj(data, ('payload', { | |
191 | 'url': ('enclosures', 0, 'url'), | |
192 | 'ext': ('enclosures', 0, 'type', {mimetype2ext}), | |
193 | 'title': 'title', | |
194 | 'description': ('description', {clean_html}), | |
195 | 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), | |
196 | 'series': ('podcast', 'title'), | |
197 | })), | |
198 | } | |
199 | ||
200 | ||
201 | class ORFIPTVIE(InfoExtractor): | |
202 | IE_NAME = 'orf:iptv' | |
203 | IE_DESC = 'iptv.ORF.at' | |
204 | _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' | |
205 | ||
206 | _TEST = { | |
207 | 'url': 'http://iptv.orf.at/stories/2275236/', | |
208 | 'md5': 'c8b22af4718a4b4af58342529453e3e5', | |
209 | 'info_dict': { | |
210 | 'id': '350612', | |
211 | 'ext': 'flv', | |
212 | 'title': 'Weitere Evakuierungen um Vulkan Calbuco', | |
213 | 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', | |
214 | 'duration': 68.197, | |
215 | 'thumbnail': r're:^https?://.*\.jpg$', | |
216 | 'upload_date': '20150425', | |
217 | }, | |
218 | } | |
219 | ||
220 | def _real_extract(self, url): | |
221 | story_id = self._match_id(url) | |
222 | ||
223 | webpage = self._download_webpage( | |
224 | f'http://iptv.orf.at/stories/{story_id}', story_id) | |
225 | ||
226 | video_id = self._search_regex( | |
227 | r'data-video(?:id)?="(\d+)"', webpage, 'video id') | |
228 | ||
229 | data = self._download_json( | |
230 | f'http://bits.orf.at/filehandler/static-api/json/current/data.json?file={video_id}', | |
231 | video_id)[0] | |
232 | ||
233 | duration = float_or_none(data['duration'], 1000) | |
234 | ||
235 | video = data['sources']['default'] | |
236 | load_balancer_url = video['loadBalancerUrl'] | |
237 | abr = int_or_none(video.get('audioBitrate')) | |
238 | vbr = int_or_none(video.get('bitrate')) | |
239 | fps = int_or_none(video.get('videoFps')) | |
240 | width = int_or_none(video.get('videoWidth')) | |
241 | height = int_or_none(video.get('videoHeight')) | |
242 | thumbnail = video.get('preview') | |
243 | ||
244 | rendition = self._download_json( | |
245 | load_balancer_url, video_id, transform_source=strip_jsonp) | |
246 | ||
247 | f = { | |
248 | 'abr': abr, | |
249 | 'vbr': vbr, | |
250 | 'fps': fps, | |
251 | 'width': width, | |
252 | 'height': height, | |
253 | } | |
254 | ||
255 | formats = [] | |
256 | for format_id, format_url in rendition['redirect'].items(): | |
257 | if format_id == 'rtmp': | |
258 | ff = f.copy() | |
259 | ff.update({ | |
260 | 'url': format_url, | |
261 | 'format_id': format_id, | |
262 | }) | |
263 | formats.append(ff) | |
264 | elif determine_ext(format_url) == 'f4m': | |
265 | formats.extend(self._extract_f4m_formats( | |
266 | format_url, video_id, f4m_id=format_id)) | |
267 | elif determine_ext(format_url) == 'm3u8': | |
268 | formats.extend(self._extract_m3u8_formats( | |
269 | format_url, video_id, 'mp4', m3u8_id=format_id)) | |
270 | else: | |
271 | continue | |
272 | ||
273 | title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at') | |
274 | description = self._og_search_description(webpage) | |
275 | upload_date = unified_strdate(self._html_search_meta( | |
276 | 'dc.date', webpage, 'upload date')) | |
277 | ||
278 | return { | |
279 | 'id': video_id, | |
280 | 'title': title, | |
281 | 'description': description, | |
282 | 'duration': duration, | |
283 | 'thumbnail': thumbnail, | |
284 | 'upload_date': upload_date, | |
285 | 'formats': formats, | |
286 | } | |
287 | ||
288 | ||
289 | class ORFFM4StoryIE(InfoExtractor): | |
290 | IE_NAME = 'orf:fm4:story' | |
291 | IE_DESC = 'fm4.orf.at stories' | |
292 | _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)' | |
293 | ||
294 | _TEST = { | |
295 | 'url': 'http://fm4.orf.at/stories/2865738/', | |
296 | 'playlist': [{ | |
297 | 'md5': 'e1c2c706c45c7b34cf478bbf409907ca', | |
298 | 'info_dict': { | |
299 | 'id': '547792', | |
300 | 'ext': 'flv', | |
301 | 'title': 'Manu Delago und Inner Tongue live', | |
302 | 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', | |
303 | 'duration': 1748.52, | |
304 | 'thumbnail': r're:^https?://.*\.jpg$', | |
305 | 'upload_date': '20170913', | |
306 | }, | |
307 | }, { | |
308 | 'md5': 'c6dd2179731f86f4f55a7b49899d515f', | |
309 | 'info_dict': { | |
310 | 'id': '547798', | |
311 | 'ext': 'flv', | |
312 | 'title': 'Manu Delago und Inner Tongue live (2)', | |
313 | 'duration': 1504.08, | |
314 | 'thumbnail': r're:^https?://.*\.jpg$', | |
315 | 'upload_date': '20170913', | |
316 | 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', | |
317 | }, | |
318 | }], | |
319 | } | |
320 | ||
321 | def _real_extract(self, url): | |
322 | story_id = self._match_id(url) | |
323 | webpage = self._download_webpage(url, story_id) | |
324 | ||
325 | entries = [] | |
326 | all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage)) | |
327 | for idx, video_id in enumerate(all_ids): | |
328 | data = self._download_json( | |
329 | f'http://bits.orf.at/filehandler/static-api/json/current/data.json?file={video_id}', | |
330 | video_id)[0] | |
331 | ||
332 | duration = float_or_none(data['duration'], 1000) | |
333 | ||
334 | video = data['sources']['q8c'] | |
335 | load_balancer_url = video['loadBalancerUrl'] | |
336 | abr = int_or_none(video.get('audioBitrate')) | |
337 | vbr = int_or_none(video.get('bitrate')) | |
338 | fps = int_or_none(video.get('videoFps')) | |
339 | width = int_or_none(video.get('videoWidth')) | |
340 | height = int_or_none(video.get('videoHeight')) | |
341 | thumbnail = video.get('preview') | |
342 | ||
343 | rendition = self._download_json( | |
344 | load_balancer_url, video_id, transform_source=strip_jsonp) | |
345 | ||
346 | f = { | |
347 | 'abr': abr, | |
348 | 'vbr': vbr, | |
349 | 'fps': fps, | |
350 | 'width': width, | |
351 | 'height': height, | |
352 | } | |
353 | ||
354 | formats = [] | |
355 | for format_id, format_url in rendition['redirect'].items(): | |
356 | if format_id == 'rtmp': | |
357 | ff = f.copy() | |
358 | ff.update({ | |
359 | 'url': format_url, | |
360 | 'format_id': format_id, | |
361 | }) | |
362 | formats.append(ff) | |
363 | elif determine_ext(format_url) == 'f4m': | |
364 | formats.extend(self._extract_f4m_formats( | |
365 | format_url, video_id, f4m_id=format_id)) | |
366 | elif determine_ext(format_url) == 'm3u8': | |
367 | formats.extend(self._extract_m3u8_formats( | |
368 | format_url, video_id, 'mp4', m3u8_id=format_id)) | |
369 | else: | |
370 | continue | |
371 | ||
372 | title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at') | |
373 | if idx >= 1: | |
374 | # Titles are duplicates, make them unique | |
375 | title += ' (' + str(idx + 1) + ')' | |
376 | description = self._og_search_description(webpage) | |
377 | upload_date = unified_strdate(self._html_search_meta( | |
378 | 'dc.date', webpage, 'upload date')) | |
379 | ||
380 | entries.append({ | |
381 | 'id': video_id, | |
382 | 'title': title, | |
383 | 'description': description, | |
384 | 'duration': duration, | |
385 | 'thumbnail': thumbnail, | |
386 | 'upload_date': upload_date, | |
387 | 'formats': formats, | |
388 | }) | |
389 | ||
390 | return self.playlist_result(entries) | |
391 | ||
392 | ||
393 | class ORFONIE(InfoExtractor): | |
394 | IE_NAME = 'orf:on' | |
395 | _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)(?:/(?P<segment>\d+))?' | |
396 | _TESTS = [{ | |
397 | 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', | |
398 | 'info_dict': { | |
399 | 'id': '14210000', | |
400 | 'ext': 'mp4', | |
401 | 'duration': 2651.08, | |
402 | 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg', | |
403 | 'title': 'School of Champions (4/8)', | |
404 | 'description': 'md5:d09ad279fc2e8502611e7648484b6afd', | |
405 | 'media_type': 'episode', | |
406 | 'timestamp': 1706558922, | |
407 | 'upload_date': '20240129', | |
408 | 'release_timestamp': 1706472362, | |
409 | 'release_date': '20240128', | |
410 | 'modified_timestamp': 1712756663, | |
411 | 'modified_date': '20240410', | |
412 | '_old_archive_ids': ['orftvthek 14210000'], | |
413 | }, | |
414 | }, { | |
415 | 'url': 'https://on.orf.at/video/3220355', | |
416 | 'md5': 'f94d98e667cf9a3851317efb4e136662', | |
417 | 'info_dict': { | |
418 | 'id': '3220355', | |
419 | 'ext': 'mp4', | |
420 | 'duration': 445.04, | |
421 | 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png', | |
422 | 'title': '50 Jahre Burgenland: Der Festumzug', | |
423 | 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0', | |
424 | 'media_type': 'episode', | |
425 | 'timestamp': 52916400, | |
426 | 'upload_date': '19710905', | |
427 | 'release_timestamp': 52916400, | |
428 | 'release_date': '19710905', | |
429 | 'modified_timestamp': 1498536049, | |
430 | 'modified_date': '20170627', | |
431 | '_old_archive_ids': ['orftvthek 3220355'], | |
432 | }, | |
433 | }, { | |
434 | # Video with multiple segments selecting the second segment | |
435 | 'url': 'https://on.orf.at/video/14226549/15639808/jugendbande-einbrueche-aus-langeweile', | |
436 | 'md5': '90f4ebff86b4580837b8a361d0232a9e', | |
437 | 'info_dict': { | |
438 | 'id': '15639808', | |
439 | 'ext': 'mp4', | |
440 | 'duration': 97.707, | |
441 | 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0175/43/thumb_17442704_segments_highlight_teaser.jpg', | |
442 | 'title': 'Jugendbande: Einbrüche aus Langeweile', | |
443 | 'description': 'md5:193df0bf0d91cf16830c211078097120', | |
444 | 'media_type': 'segment', | |
445 | 'timestamp': 1715792400, | |
446 | 'upload_date': '20240515', | |
447 | 'modified_timestamp': 1715794394, | |
448 | 'modified_date': '20240515', | |
449 | '_old_archive_ids': ['orftvthek 15639808'], | |
450 | }, | |
451 | 'params': {'noplaylist': True}, | |
452 | }, { | |
453 | # Video with multiple segments and no combined version | |
454 | 'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024', | |
455 | 'info_dict': { | |
456 | '_type': 'multi_video', | |
457 | 'id': '14227864', | |
458 | 'duration': 18410.52, | |
459 | 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/04/thumb_17503881_segments_highlight_teaser.jpg', | |
460 | 'title': 'Formel 1: Großer Preis von Monaco 2024', | |
461 | 'description': 'md5:aeeb010710ccf70ce28ccb4482243d4f', | |
462 | 'media_type': 'episode', | |
463 | 'timestamp': 1716721200, | |
464 | 'upload_date': '20240526', | |
465 | 'release_timestamp': 1716721802, | |
466 | 'release_date': '20240526', | |
467 | 'modified_timestamp': 1716967501, | |
468 | 'modified_date': '20240529', | |
469 | }, | |
470 | 'playlist_count': 42, | |
471 | }, { | |
472 | # Video with multiple segments, but with combined version | |
473 | 'url': 'https://on.orf.at/video/14228172', | |
474 | 'info_dict': { | |
475 | 'id': '14228172', | |
476 | 'ext': 'mp4', | |
477 | 'duration': 3294.878, | |
478 | 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/17/thumb_17516455_segments_highlight_teaser.jpg', | |
479 | 'title': 'Willkommen Österreich mit Stermann & Grissemann', | |
480 | 'description': 'md5:5de034d033a9c27f989343be3bbd4839', | |
481 | 'media_type': 'episode', | |
482 | 'timestamp': 1716926584, | |
483 | 'upload_date': '20240528', | |
484 | 'release_timestamp': 1716919202, | |
485 | 'release_date': '20240528', | |
486 | 'modified_timestamp': 1716968045, | |
487 | 'modified_date': '20240529', | |
488 | '_old_archive_ids': ['orftvthek 14228172'], | |
489 | }, | |
490 | }] | |
491 | ||
492 | @staticmethod | |
493 | def _parse_metadata(api_json): | |
494 | return traverse_obj(api_json, { | |
495 | 'id': ('id', {int}, {str_or_none}), | |
496 | 'age_limit': ('age_classification', {parse_age_limit}), | |
497 | 'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}), | |
498 | 'title': (('title', 'headline'), {str}), | |
499 | 'description': (('description', 'teaser_text'), {str}), | |
500 | 'media_type': ('video_type', {str}), | |
501 | 'thumbnail': ('_embedded', 'image', 'public_urls', 'highlight_teaser', 'url', {url_or_none}), | |
502 | 'timestamp': (('date', 'episode_date'), {parse_iso8601}), | |
503 | 'release_timestamp': ('release_date', {parse_iso8601}), | |
504 | 'modified_timestamp': ('updated_at', {parse_iso8601}), | |
505 | }, get_all=False) | |
506 | ||
507 | def _extract_video_info(self, video_id, api_json): | |
508 | formats, subtitles = [], {} | |
509 | for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): | |
510 | for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): | |
511 | if manifest_type == 'hls': | |
512 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
513 | manifest_url, video_id, fatal=False, m3u8_id='hls') | |
514 | elif manifest_type == 'dash': | |
515 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
516 | manifest_url, video_id, fatal=False, mpd_id='dash') | |
517 | else: | |
518 | continue | |
519 | formats.extend(fmts) | |
520 | self._merge_subtitles(subs, target=subtitles) | |
521 | ||
522 | for sub_url in traverse_obj(api_json, ( | |
523 | '_embedded', 'subtitle', | |
524 | ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})): | |
525 | self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles) | |
526 | ||
527 | return { | |
528 | 'id': video_id, | |
529 | 'formats': formats, | |
530 | 'subtitles': subtitles, | |
531 | '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], | |
532 | **self._parse_metadata(api_json), | |
533 | } | |
534 | ||
535 | def _real_extract(self, url): | |
536 | video_id, segment_id = self._match_valid_url(url).group('id', 'segment') | |
537 | ||
538 | encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() | |
539 | api_json = self._download_json( | |
540 | f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) | |
541 | ||
542 | if traverse_obj(api_json, 'is_drm_protected'): | |
543 | self.report_drm(video_id) | |
544 | ||
545 | segments = traverse_obj(api_json, ('_embedded', 'segments', lambda _, v: v['id'])) | |
546 | selected_segment = traverse_obj(segments, (lambda _, v: str(v['id']) == segment_id, any)) | |
547 | ||
548 | # selected_segment will be falsy if input URL did not include a valid segment_id | |
549 | if selected_segment and not self._yes_playlist(video_id, segment_id, playlist_label='episode', video_label='segment'): | |
550 | return self._extract_video_info(segment_id, selected_segment) | |
551 | ||
552 | # Even some segmented videos have an unsegmented version available in API response root | |
553 | if (self._configuration_arg('prefer_segments_playlist') | |
554 | or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))): | |
555 | return self.playlist_result( | |
556 | (self._extract_video_info(str(segment['id']), segment) for segment in segments), | |
557 | video_id, **self._parse_metadata(api_json), multi_video=True) | |
558 | ||
559 | return self._extract_video_info(video_id, api_json) |