]>
Commit | Line | Data |
---|---|---|
ae43a4b9 | 1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | ExtractorError, | |
4 | traverse_obj, | |
5 | unified_timestamp, | |
6 | ) | |
7 | ||
8 | ||
9 | class HSEShowBaseInfoExtractor(InfoExtractor): | |
10 | _GEO_COUNTRIES = ['DE'] | |
11 | ||
12 | def _extract_redux_data(self, url, video_id): | |
13 | webpage = self._download_webpage(url, video_id) | |
14 | redux = self._html_search_regex( | |
15 | r'window\.__REDUX_DATA__\s*=\s*({.*});?', webpage, 'redux data') | |
16 | return self._parse_json(redux.replace('\n', ''), video_id) | |
17 | ||
18 | def _extract_formats_and_subtitles(self, sources, video_id): | |
19 | if not sources: | |
20 | raise ExtractorError('No video found', expected=True, video_id=video_id) | |
21 | formats, subtitles = [], {} | |
22 | for src in sources: | |
23 | if src['mimetype'] != 'application/x-mpegURL': | |
24 | continue | |
25 | fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4') | |
26 | formats.extend(fmts) | |
27 | subtitles = self._merge_subtitles(subtitles, subs) | |
28 | self._sort_formats(formats) | |
29 | return formats, subtitles | |
30 | ||
31 | ||
32 | class HSEShowIE(HSEShowBaseInfoExtractor): | |
33 | _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/c/tv-shows/(?P<id>[0-9]+)' | |
34 | _TESTS = [{ | |
35 | 'url': 'https://www.hse.de/dpl/c/tv-shows/505350', | |
36 | 'info_dict': { | |
37 | 'id': '505350', | |
38 | 'ext': 'mp4', | |
39 | 'title': 'Pfeffinger Mode & Accessoires', | |
40 | 'timestamp': 1638810000, | |
41 | 'upload_date': '20211206', | |
42 | 'channel': 'HSE24', | |
43 | 'uploader': 'Arina Pirayesh' | |
44 | }, | |
45 | 'params': {'skip_download': 'm3u8'}, | |
46 | }] | |
47 | ||
48 | def _real_extract(self, url): | |
49 | video_id = self._match_id(url) | |
50 | json_data = self._extract_redux_data(url, video_id) | |
51 | formats, subtitles = self._extract_formats_and_subtitles( | |
52 | traverse_obj(json_data, ('tvShowPage', 'tvShowVideo', 'sources')), video_id) | |
53 | ||
54 | show = traverse_obj(json_data, ('tvShowPage', 'tvShow')) or {} | |
55 | return { | |
56 | 'id': video_id, | |
57 | 'title': show.get('title') or video_id, | |
58 | 'formats': formats, | |
59 | 'timestamp': unified_timestamp(f'{show.get("date")} {show.get("hour")}:00'), | |
60 | 'thumbnail': traverse_obj(json_data, ('tvShowVideo', 'poster')), | |
61 | 'channel': self._search_regex( | |
62 | r'tvShow \| ([A-Z0-9]+)_', show.get('actionFieldText') or '', video_id, fatal=False), | |
63 | 'uploader': show.get('presenter'), | |
64 | 'subtitles': subtitles, | |
65 | } | |
66 | ||
67 | ||
68 | class HSEProductIE(HSEShowBaseInfoExtractor): | |
69 | _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/p/product/(?P<id>[0-9]+)' | |
70 | _TESTS = [{ | |
71 | 'url': 'https://www.hse.de/dpl/p/product/408630', | |
72 | 'info_dict': { | |
73 | 'id': '408630', | |
74 | 'ext': 'mp4', | |
75 | 'title': 'Hose im Ponte-Mix', | |
76 | 'uploader': 'Judith Williams' | |
77 | }, | |
78 | 'params': {'skip_download': 'm3u8'}, | |
79 | }] | |
80 | ||
81 | def _real_extract(self, url): | |
82 | video_id = self._match_id(url) | |
83 | json_data = self._extract_redux_data(url, video_id) | |
84 | video = traverse_obj(json_data, ('productContent', 'productContent', 'videos', 0)) or {} | |
85 | formats, subtitles = self._extract_formats_and_subtitles(video.get('sources'), video_id) | |
86 | ||
87 | return { | |
88 | 'id': video_id, | |
89 | 'title': traverse_obj(json_data, ('productDetail', 'product', 'name', 'short')) or video_id, | |
90 | 'formats': formats, | |
91 | 'subtitles': subtitles, | |
92 | 'thumbnail': video.get('poster'), | |
93 | 'uploader': traverse_obj(json_data, ('productDetail', 'product', 'brand', 'brandName')), | |
94 | } |