]>
Commit | Line | Data |
---|---|---|
ae43a4b9 | 1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | ExtractorError, | |
4 | traverse_obj, | |
5 | unified_timestamp, | |
6 | ) | |
7 | ||
8 | ||
9 | class HSEShowBaseInfoExtractor(InfoExtractor): | |
10 | _GEO_COUNTRIES = ['DE'] | |
11 | ||
12 | def _extract_redux_data(self, url, video_id): | |
13 | webpage = self._download_webpage(url, video_id) | |
14 | redux = self._html_search_regex( | |
15 | r'window\.__REDUX_DATA__\s*=\s*({.*});?', webpage, 'redux data') | |
16 | return self._parse_json(redux.replace('\n', ''), video_id) | |
17 | ||
18 | def _extract_formats_and_subtitles(self, sources, video_id): | |
19 | if not sources: | |
20 | raise ExtractorError('No video found', expected=True, video_id=video_id) | |
21 | formats, subtitles = [], {} | |
22 | for src in sources: | |
23 | if src['mimetype'] != 'application/x-mpegURL': | |
24 | continue | |
25 | fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4') | |
26 | formats.extend(fmts) | |
27 | subtitles = self._merge_subtitles(subtitles, subs) | |
ae43a4b9 | 28 | return formats, subtitles |
29 | ||
30 | ||
31 | class HSEShowIE(HSEShowBaseInfoExtractor): | |
32 | _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/c/tv-shows/(?P<id>[0-9]+)' | |
33 | _TESTS = [{ | |
34 | 'url': 'https://www.hse.de/dpl/c/tv-shows/505350', | |
35 | 'info_dict': { | |
36 | 'id': '505350', | |
37 | 'ext': 'mp4', | |
38 | 'title': 'Pfeffinger Mode & Accessoires', | |
39 | 'timestamp': 1638810000, | |
40 | 'upload_date': '20211206', | |
41 | 'channel': 'HSE24', | |
42 | 'uploader': 'Arina Pirayesh' | |
43 | }, | |
44 | 'params': {'skip_download': 'm3u8'}, | |
45 | }] | |
46 | ||
47 | def _real_extract(self, url): | |
48 | video_id = self._match_id(url) | |
49 | json_data = self._extract_redux_data(url, video_id) | |
50 | formats, subtitles = self._extract_formats_and_subtitles( | |
51 | traverse_obj(json_data, ('tvShowPage', 'tvShowVideo', 'sources')), video_id) | |
52 | ||
53 | show = traverse_obj(json_data, ('tvShowPage', 'tvShow')) or {} | |
54 | return { | |
55 | 'id': video_id, | |
56 | 'title': show.get('title') or video_id, | |
57 | 'formats': formats, | |
58 | 'timestamp': unified_timestamp(f'{show.get("date")} {show.get("hour")}:00'), | |
59 | 'thumbnail': traverse_obj(json_data, ('tvShowVideo', 'poster')), | |
60 | 'channel': self._search_regex( | |
61 | r'tvShow \| ([A-Z0-9]+)_', show.get('actionFieldText') or '', video_id, fatal=False), | |
62 | 'uploader': show.get('presenter'), | |
63 | 'subtitles': subtitles, | |
64 | } | |
65 | ||
66 | ||
67 | class HSEProductIE(HSEShowBaseInfoExtractor): | |
68 | _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/p/product/(?P<id>[0-9]+)' | |
69 | _TESTS = [{ | |
70 | 'url': 'https://www.hse.de/dpl/p/product/408630', | |
71 | 'info_dict': { | |
72 | 'id': '408630', | |
73 | 'ext': 'mp4', | |
74 | 'title': 'Hose im Ponte-Mix', | |
75 | 'uploader': 'Judith Williams' | |
76 | }, | |
77 | 'params': {'skip_download': 'm3u8'}, | |
78 | }] | |
79 | ||
80 | def _real_extract(self, url): | |
81 | video_id = self._match_id(url) | |
82 | json_data = self._extract_redux_data(url, video_id) | |
83 | video = traverse_obj(json_data, ('productContent', 'productContent', 'videos', 0)) or {} | |
84 | formats, subtitles = self._extract_formats_and_subtitles(video.get('sources'), video_id) | |
85 | ||
86 | return { | |
87 | 'id': video_id, | |
88 | 'title': traverse_obj(json_data, ('productDetail', 'product', 'name', 'short')) or video_id, | |
89 | 'formats': formats, | |
90 | 'subtitles': subtitles, | |
91 | 'thumbnail': video.get('poster'), | |
92 | 'uploader': traverse_obj(json_data, ('productDetail', 'product', 'brand', 'brandName')), | |
93 | } |