]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | import time | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | determine_ext, | |
7 | js_to_json, | |
8 | urlencode_postdata, | |
9 | ExtractorError, | |
10 | parse_qs, | |
11 | traverse_obj | |
12 | ) | |
13 | ||
14 | ||
15 | class IPrimaIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' | |
17 | _GEO_BYPASS = False | |
18 | _NETRC_MACHINE = 'iprima' | |
19 | _AUTH_ROOT = 'https://auth.iprima.cz' | |
20 | access_token = None | |
21 | ||
22 | _TESTS = [{ | |
23 | 'url': 'https://prima.iprima.cz/particka/92-epizoda', | |
24 | 'info_dict': { | |
25 | 'id': 'p51388', | |
26 | 'ext': 'mp4', | |
27 | 'title': 'Partička (92)', | |
28 | 'description': 'md5:859d53beae4609e6dd7796413f1b6cac', | |
29 | 'upload_date': '20201103', | |
30 | 'timestamp': 1604437480, | |
31 | }, | |
32 | 'params': { | |
33 | 'skip_download': True, # m3u8 download | |
34 | }, | |
35 | }, { | |
36 | 'url': 'http://play.iprima.cz/particka/particka-92', | |
37 | 'only_matching': True, | |
38 | }, { | |
39 | # geo restricted | |
40 | 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2', | |
44 | 'only_matching': True, | |
45 | }, { | |
46 | 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', | |
47 | 'only_matching': True, | |
48 | }, { | |
49 | 'url': 'http://www.iprima.cz/filmy/desne-rande', | |
50 | 'only_matching': True, | |
51 | }, { | |
52 | 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby', | |
53 | 'only_matching': True, | |
54 | }, { | |
55 | 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy', | |
56 | 'only_matching': True, | |
57 | }, { | |
58 | 'url': 'https://cool.iprima.cz/derava-silnice-nevadi', | |
59 | 'only_matching': True, | |
60 | }, { | |
61 | 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi', | |
62 | 'only_matching': True, | |
63 | }] | |
64 | ||
65 | def _perform_login(self, username, password): | |
66 | if self.access_token: | |
67 | return | |
68 | ||
69 | login_page = self._download_webpage( | |
70 | f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page', | |
71 | errnote='Downloading login page failed') | |
72 | ||
73 | login_form = self._hidden_inputs(login_page) | |
74 | ||
75 | login_form.update({ | |
76 | '_email': username, | |
77 | '_password': password}) | |
78 | ||
79 | profile_select_html, login_handle = self._download_webpage_handle( | |
80 | f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form), | |
81 | note='Logging in') | |
82 | ||
83 | # a profile may need to be selected first, even when there is only a single one | |
84 | if '/profile-select' in login_handle.geturl(): | |
85 | profile_id = self._search_regex( | |
86 | r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id') | |
87 | ||
88 | login_handle = self._request_webpage( | |
89 | f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None, | |
90 | query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile') | |
91 | ||
92 | code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0)) | |
93 | if not code: | |
94 | raise ExtractorError('Login failed', expected=True) | |
95 | ||
96 | token_request_data = { | |
97 | 'scope': 'openid+email+profile+phone+address+offline_access', | |
98 | 'client_id': 'prima_sso', | |
99 | 'grant_type': 'authorization_code', | |
100 | 'code': code, | |
101 | 'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'} | |
102 | ||
103 | token_data = self._download_json( | |
104 | f'{self._AUTH_ROOT}/oauth2/token', None, | |
105 | note='Downloading token', errnote='Downloading token failed', | |
106 | data=urlencode_postdata(token_request_data)) | |
107 | ||
108 | self.access_token = token_data.get('access_token') | |
109 | if self.access_token is None: | |
110 | raise ExtractorError('Getting token failed', expected=True) | |
111 | ||
112 | def _real_initialize(self): | |
113 | if not self.access_token: | |
114 | self.raise_login_required('Login is required to access any iPrima content', method='password') | |
115 | ||
116 | def _raise_access_error(self, error_code): | |
117 | if error_code == 'PLAY_GEOIP_DENIED': | |
118 | self.raise_geo_restricted(countries=['CZ'], metadata_available=True) | |
119 | elif error_code is not None: | |
120 | self.raise_no_formats('Access to stream infos forbidden', expected=True) | |
121 | ||
122 | def _real_extract(self, url): | |
123 | video_id = self._match_id(url) | |
124 | ||
125 | webpage = self._download_webpage(url, video_id) | |
126 | ||
127 | title = self._html_extract_title(webpage) or self._html_search_meta( | |
128 | ['og:title', 'twitter:title'], | |
129 | webpage, 'title', default=None) | |
130 | ||
131 | video_id = self._search_regex(( | |
132 | r'productId\s*=\s*([\'"])(?P<id>p\d+)\1', | |
133 | r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1', | |
134 | ), webpage, 'real id', group='id', default=None) | |
135 | ||
136 | if not video_id: | |
137 | nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data') | |
138 | video_id = traverse_obj( | |
139 | nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) | |
140 | ||
141 | if not video_id: | |
142 | self.raise_no_formats('Unable to extract video ID from webpage') | |
143 | ||
144 | metadata = self._download_json( | |
145 | f'https://api.play-backend.iprima.cz/api/v1//products/id-{video_id}/play', | |
146 | video_id, note='Getting manifest URLs', errnote='Failed to get manifest URLs', | |
147 | headers={'X-OTT-Access-Token': self.access_token}, | |
148 | expected_status=403) | |
149 | ||
150 | self._raise_access_error(metadata.get('errorCode')) | |
151 | ||
152 | stream_infos = metadata.get('streamInfos') | |
153 | formats = [] | |
154 | if stream_infos is None: | |
155 | self.raise_no_formats('Reading stream infos failed', expected=True) | |
156 | else: | |
157 | for manifest in stream_infos: | |
158 | manifest_type = manifest.get('type') | |
159 | manifest_url = manifest.get('url') | |
160 | ext = determine_ext(manifest_url) | |
161 | if manifest_type == 'HLS' or ext == 'm3u8': | |
162 | formats += self._extract_m3u8_formats( | |
163 | manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
164 | m3u8_id='hls', fatal=False) | |
165 | elif manifest_type == 'DASH' or ext == 'mpd': | |
166 | formats += self._extract_mpd_formats( | |
167 | manifest_url, video_id, mpd_id='dash', fatal=False) | |
168 | ||
169 | final_result = self._search_json_ld(webpage, video_id, default={}) | |
170 | final_result.update({ | |
171 | 'id': video_id, | |
172 | 'title': title, | |
173 | 'thumbnail': self._html_search_meta( | |
174 | ['thumbnail', 'og:image', 'twitter:image'], | |
175 | webpage, 'thumbnail', default=None), | |
176 | 'formats': formats, | |
177 | 'description': self._html_search_meta( | |
178 | ['description', 'og:description', 'twitter:description'], | |
179 | webpage, 'description', default=None)}) | |
180 | ||
181 | return final_result | |
182 | ||
183 | ||
184 | class IPrimaCNNIE(InfoExtractor): | |
185 | _VALID_URL = r'https?://cnn\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' | |
186 | _GEO_BYPASS = False | |
187 | ||
188 | _TESTS = [{ | |
189 | 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova', | |
190 | 'info_dict': { | |
191 | 'id': 'p716177', | |
192 | 'ext': 'mp4', | |
193 | 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e', | |
194 | }, | |
195 | 'params': { | |
196 | 'skip_download': 'm3u8' | |
197 | } | |
198 | }] | |
199 | ||
200 | def _real_extract(self, url): | |
201 | video_id = self._match_id(url) | |
202 | ||
203 | self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1') | |
204 | ||
205 | webpage = self._download_webpage(url, video_id) | |
206 | ||
207 | title = self._og_search_title( | |
208 | webpage, default=None) or self._search_regex( | |
209 | r'<h1>([^<]+)', webpage, 'title') | |
210 | ||
211 | video_id = self._search_regex( | |
212 | (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', | |
213 | r'data-product="([^"]+)">', | |
214 | r'id=["\']player-(p\d+)"', | |
215 | r'playerId\s*:\s*["\']player-(p\d+)', | |
216 | r'\bvideos\s*=\s*["\'](p\d+)'), | |
217 | webpage, 'real id') | |
218 | ||
219 | playerpage = self._download_webpage( | |
220 | 'http://play.iprima.cz/prehravac/init', | |
221 | video_id, note='Downloading player', query={ | |
222 | '_infuse': 1, | |
223 | '_ts': round(time.time()), | |
224 | 'productId': video_id, | |
225 | }, headers={'Referer': url}) | |
226 | ||
227 | formats = [] | |
228 | ||
229 | def extract_formats(format_url, format_key=None, lang=None): | |
230 | ext = determine_ext(format_url) | |
231 | new_formats = [] | |
232 | if format_key == 'hls' or ext == 'm3u8': | |
233 | new_formats = self._extract_m3u8_formats( | |
234 | format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
235 | m3u8_id='hls', fatal=False) | |
236 | elif format_key == 'dash' or ext == 'mpd': | |
237 | return | |
238 | new_formats = self._extract_mpd_formats( | |
239 | format_url, video_id, mpd_id='dash', fatal=False) | |
240 | if lang: | |
241 | for f in new_formats: | |
242 | if not f.get('language'): | |
243 | f['language'] = lang | |
244 | formats.extend(new_formats) | |
245 | ||
246 | options = self._parse_json( | |
247 | self._search_regex( | |
248 | r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]', | |
249 | playerpage, 'player options', default='{}'), | |
250 | video_id, transform_source=js_to_json, fatal=False) | |
251 | if options: | |
252 | for key, tracks in options.get('tracks', {}).items(): | |
253 | if not isinstance(tracks, list): | |
254 | continue | |
255 | for track in tracks: | |
256 | src = track.get('src') | |
257 | if src: | |
258 | extract_formats(src, key.lower(), track.get('lang')) | |
259 | ||
260 | if not formats: | |
261 | for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): | |
262 | extract_formats(src) | |
263 | ||
264 | if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: | |
265 | self.raise_geo_restricted(countries=['CZ'], metadata_available=True) | |
266 | ||
267 | return { | |
268 | 'id': video_id, | |
269 | 'title': title, | |
270 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
271 | 'formats': formats, | |
272 | 'description': self._og_search_description(webpage, default=None), | |
273 | } |