]>
Commit | Line | Data |
---|---|---|
52c2af82 TI |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | determine_ext, | |
7 | get_element_by_class, | |
8 | int_or_none, | |
9 | merge_dicts, | |
10 | parse_bitrate, | |
11 | parse_resolution, | |
12 | remove_end, | |
13 | str_or_none, | |
14 | url_or_none, | |
15 | urlencode_postdata, | |
16 | ) | |
17 | ||
18 | ||
19 | class IcareusIE(InfoExtractor): | |
20 | _DOMAINS = '|'.join(map(re.escape, ( | |
21 | 'asahitv.fi', | |
22 | 'helsinkikanava.fi', | |
23 | 'hyvinvointitv.fi', | |
24 | 'inez.fi', | |
25 | 'permanto.fi', | |
26 | 'suite.icareus.com', | |
27 | 'videos.minifiddlers.org', | |
28 | ))) | |
29 | _VALID_URL = rf'(?P<base_url>https?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P<id>\d+)' | |
30 | _TESTS = [{ | |
31 | 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894', | |
32 | 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a', | |
33 | 'info_dict': { | |
34 | 'id': '68021894', | |
35 | 'ext': 'mp4', | |
36 | 'title': 'Perheiden parhaaksi', | |
37 | 'description': 'md5:295785ea408e5ac00708766465cc1325', | |
38 | 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501', | |
39 | 'upload_date': '20200924', | |
40 | 'timestamp': 1600938300, | |
41 | }, | |
42 | }, { # Recorded livestream | |
43 | 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489', | |
44 | 'md5': '014327e69dfa7b949fcc861f6d162d6d', | |
45 | 'info_dict': { | |
46 | 'id': '76258304', | |
47 | 'ext': 'mp4', | |
48 | 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020', | |
49 | 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c', | |
50 | 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630', | |
51 | 'upload_date': '20201124', | |
52 | 'timestamp': 1606206600, | |
53 | }, | |
54 | }, { # Non-m3u8 stream | |
55 | 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389', | |
56 | 'md5': '72fc04ee971bbedc44405cdf16c990b6', | |
57 | 'info_dict': { | |
58 | 'id': '47567389', | |
59 | 'ext': 'mp4', | |
60 | 'title': 'Omatoiminen harjoittelu - Laukominen', | |
61 | 'description': '', | |
62 | 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162', | |
63 | 'upload_date': '20200319', | |
64 | 'timestamp': 1584658080, | |
65 | }, | |
66 | }, { | |
67 | 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818', | |
68 | 'only_matching': True | |
69 | }, { | |
70 | 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730', | |
71 | 'only_matching': True | |
72 | }, { | |
73 | 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822', | |
74 | 'only_matching': True | |
75 | }, { | |
76 | 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515', | |
77 | 'only_matching': True | |
78 | }, { | |
79 | 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759', | |
80 | 'only_matching': True | |
81 | }] | |
82 | ||
83 | def _real_extract(self, url): | |
84 | base_url, temp_id = self._match_valid_url(url).groups() | |
85 | webpage = self._download_webpage(url, temp_id) | |
86 | ||
87 | video_id = self._search_regex(r"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage, 'video_id') | |
88 | organization_id = self._search_regex(r"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage, 'organization_id') | |
89 | ||
90 | assets = self._download_json( | |
91 | self._search_regex(r'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage, 'api_base'), | |
92 | video_id, data=urlencode_postdata({ | |
93 | 'version': '03', | |
94 | 'action': 'getAssetPlaybackUrls', | |
95 | 'organizationId': organization_id, | |
96 | 'assetId': video_id, | |
97 | 'token': self._search_regex(r"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage, 'icareus_token'), | |
98 | })) | |
99 | ||
100 | subtitles = { | |
101 | remove_end(sdesc.split(' ')[0], ':'): [{'url': url_or_none(surl)}] | |
102 | for _, sdesc, surl in assets.get('subtitles') or [] | |
103 | } | |
104 | ||
105 | formats = [{ | |
106 | 'format': item.get('name'), | |
107 | 'format_id': 'audio', | |
108 | 'vcodec': 'none', | |
109 | 'url': url_or_none(item['url']), | |
110 | 'tbr': int_or_none(self._search_regex( | |
111 | r'\((\d+)\s*k\)', item.get('name') or '', 'audio bitrate', default=None)), | |
112 | } for item in assets.get('audio_urls') or [] if url_or_none(item.get('url'))] | |
113 | ||
114 | for item in assets.get('urls') or []: | |
115 | video_url = url_or_none(item.get('url')) | |
116 | if video_url is None: | |
117 | continue | |
118 | ext = determine_ext(video_url) | |
119 | if ext == 'm3u8': | |
120 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
121 | video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) | |
122 | formats.extend(fmts) | |
123 | self._merge_subtitles(subs, target=subtitles) | |
124 | else: | |
125 | fmt = item.get('name') | |
126 | formats.append({ | |
127 | 'url': video_url, | |
128 | 'format': fmt, | |
129 | 'tbr': parse_bitrate(fmt), | |
130 | 'format_id': str_or_none(item.get('id')), | |
131 | **parse_resolution(fmt), | |
132 | }) | |
133 | ||
134 | info, token, live_title = self._search_json_ld(webpage, video_id, default={}), None, None | |
135 | if not info: | |
136 | token = self._search_regex( | |
137 | r'data\s*:\s*{action:"getAsset".*?token:\'([a-f0-9]+)\'}', webpage, 'token', default=None) | |
138 | if not token: | |
139 | live_title = get_element_by_class('unpublished-info-item future-event-title', webpage) | |
140 | ||
141 | if token: | |
142 | metadata = self._download_json( | |
143 | f'{base_url}/icareus-suite-api-portlet/publishing', | |
144 | video_id, fatal=False, data=urlencode_postdata({ | |
145 | 'version': '03', | |
146 | 'action': 'getAsset', | |
147 | 'organizationId': organization_id, | |
148 | 'assetId': video_id, | |
149 | 'languageId': 'en_US', | |
150 | 'userId': '0', | |
151 | 'token': token, | |
152 | })) or {} | |
153 | info = { | |
154 | 'title': metadata.get('name'), | |
155 | 'description': metadata.get('description'), | |
156 | 'timestamp': int_or_none(metadata.get('date'), scale=1000), | |
157 | 'duration': int_or_none(metadata.get('duration')), | |
158 | 'thumbnail': url_or_none(metadata.get('thumbnailMedium')), | |
159 | } | |
160 | elif live_title: # Recorded livestream | |
161 | info = { | |
162 | 'title': live_title, | |
163 | 'description': get_element_by_class('unpublished-info-item future-event-description', webpage), | |
164 | 'timestamp': int_or_none(self._search_regex( | |
165 | r'var startEvent\s*=\s*(\d+);', webpage, 'uploadDate', fatal=False), scale=1000), | |
166 | } | |
167 | ||
168 | thumbnails = info.get('thumbnails') or [{ | |
169 | 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')) | |
170 | }] | |
171 | ||
172 | self._sort_formats(formats) | |
173 | return merge_dicts({ | |
174 | 'id': video_id, | |
175 | 'title': None, | |
176 | 'formats': formats, | |
177 | 'subtitles': subtitles, | |
178 | 'description': clean_html(info.get('description')), | |
179 | 'thumbnails': thumbnails if thumbnails[0]['url'] else None, | |
180 | }, info) |