]>
Commit | Line | Data |
---|---|---|
7881a644 | 1 | import re |
f406c787 | 2 | import time |
7881a644 | 3 | |
4 | from .common import InfoExtractor | |
1cc79574 | 5 | from ..utils import ( |
e897bd82 | 6 | ExtractorError, |
369e7e3f S |
7 | determine_ext, |
8 | js_to_json, | |
9fddc12a | 9 | parse_qs, |
e897bd82 SS |
10 | traverse_obj, |
11 | urlencode_postdata, | |
82642235 | 12 | ) |
7881a644 | 13 | |
14 | ||
15 | class IPrimaIE(InfoExtractor): | |
e1b7c54d | 16 | _VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
da42ff06 | 17 | _GEO_BYPASS = False |
e1b7c54d | 18 | _NETRC_MACHINE = 'iprima' |
9fddc12a | 19 | _AUTH_ROOT = 'https://auth.iprima.cz' |
e1b7c54d | 20 | access_token = None |
7881a644 | 21 | |
22 | _TESTS = [{ | |
30fa5c60 | 23 | 'url': 'https://prima.iprima.cz/particka/92-epizoda', |
7881a644 | 24 | 'info_dict': { |
30fa5c60 | 25 | 'id': 'p51388', |
f406c787 | 26 | 'ext': 'mp4', |
30fa5c60 S |
27 | 'title': 'Partička (92)', |
28 | 'description': 'md5:859d53beae4609e6dd7796413f1b6cac', | |
e1b7c54d | 29 | 'upload_date': '20201103', |
30 | 'timestamp': 1604437480, | |
7881a644 | 31 | }, |
32 | 'params': { | |
f406c787 | 33 | 'skip_download': True, # m3u8 download |
7881a644 | 34 | }, |
973f2532 | 35 | }, { |
f406c787 | 36 | 'url': 'http://play.iprima.cz/particka/particka-92', |
bc03e585 | 37 | 'only_matching': True, |
da42ff06 S |
38 | }, { |
39 | # geo restricted | |
40 | 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', | |
41 | 'only_matching': True, | |
a2637a2d | 42 | }, { |
a2637a2d S |
43 | 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2', |
44 | 'only_matching': True, | |
45 | }, { | |
a2637a2d S |
46 | 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', |
47 | 'only_matching': True, | |
9235b509 S |
48 | }, { |
49 | 'url': 'http://www.iprima.cz/filmy/desne-rande', | |
50 | 'only_matching': True, | |
90046d77 | 51 | }, { |
52 | 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby', | |
53 | 'only_matching': True, | |
54 | }, { | |
55 | 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy', | |
56 | 'only_matching': True, | |
57 | }, { | |
58 | 'url': 'https://cool.iprima.cz/derava-silnice-nevadi', | |
59 | 'only_matching': True, | |
60 | }, { | |
61 | 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi', | |
62 | 'only_matching': True, | |
e1b7c54d | 63 | }] |
64 | ||
52efa4b3 | 65 | def _perform_login(self, username, password): |
66 | if self.access_token: | |
67 | return | |
e1b7c54d | 68 | |
69 | login_page = self._download_webpage( | |
9fddc12a | 70 | f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page', |
e1b7c54d | 71 | errnote='Downloading login page failed') |
72 | ||
73 | login_form = self._hidden_inputs(login_page) | |
74 | ||
75 | login_form.update({ | |
76 | '_email': username, | |
77 | '_password': password}) | |
78 | ||
9fddc12a | 79 | profile_select_html, login_handle = self._download_webpage_handle( |
80 | f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form), | |
e1b7c54d | 81 | note='Logging in') |
82 | ||
9fddc12a | 83 | # a profile may need to be selected first, even when there is only a single one |
3d2623a8 | 84 | if '/profile-select' in login_handle.url: |
9fddc12a | 85 | profile_id = self._search_regex( |
86 | r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id') | |
87 | ||
88 | login_handle = self._request_webpage( | |
89 | f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None, | |
90 | query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile') | |
91 | ||
3d2623a8 | 92 | code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0)) |
e1b7c54d | 93 | if not code: |
94 | raise ExtractorError('Login failed', expected=True) | |
95 | ||
96 | token_request_data = { | |
97 | 'scope': 'openid+email+profile+phone+address+offline_access', | |
98 | 'client_id': 'prima_sso', | |
99 | 'grant_type': 'authorization_code', | |
100 | 'code': code, | |
9fddc12a | 101 | 'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'} |
e1b7c54d | 102 | |
103 | token_data = self._download_json( | |
9fddc12a | 104 | f'{self._AUTH_ROOT}/oauth2/token', None, |
e1b7c54d | 105 | note='Downloading token', errnote='Downloading token failed', |
106 | data=urlencode_postdata(token_request_data)) | |
107 | ||
108 | self.access_token = token_data.get('access_token') | |
109 | if self.access_token is None: | |
110 | raise ExtractorError('Getting token failed', expected=True) | |
111 | ||
52efa4b3 | 112 | def _real_initialize(self): |
113 | if not self.access_token: | |
114 | self.raise_login_required('Login is required to access any iPrima content', method='password') | |
115 | ||
e1b7c54d | 116 | def _raise_access_error(self, error_code): |
117 | if error_code == 'PLAY_GEOIP_DENIED': | |
118 | self.raise_geo_restricted(countries=['CZ'], metadata_available=True) | |
119 | elif error_code is not None: | |
120 | self.raise_no_formats('Access to stream infos forbidden', expected=True) | |
121 | ||
e1b7c54d | 122 | def _real_extract(self, url): |
123 | video_id = self._match_id(url) | |
124 | ||
125 | webpage = self._download_webpage(url, video_id) | |
126 | ||
9fddc12a | 127 | title = self._html_extract_title(webpage) or self._html_search_meta( |
e1b7c54d | 128 | ['og:title', 'twitter:title'], |
129 | webpage, 'title', default=None) | |
130 | ||
131 | video_id = self._search_regex(( | |
132 | r'productId\s*=\s*([\'"])(?P<id>p\d+)\1', | |
9fddc12a | 133 | r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1', |
134 | ), webpage, 'real id', group='id', default=None) | |
135 | ||
136 | if not video_id: | |
568f0805 | 137 | nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False) |
9fddc12a | 138 | video_id = traverse_obj( |
139 | nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) | |
140 | ||
568f0805 | 141 | if not video_id: |
142 | nuxt_data = self._search_json( | |
143 | r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>', | |
144 | webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]') | |
145 | ||
146 | video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False) | |
147 | ||
9fddc12a | 148 | if not video_id: |
149 | self.raise_no_formats('Unable to extract video ID from webpage') | |
e1b7c54d | 150 | |
151 | metadata = self._download_json( | |
152 | f'https://api.play-backend.iprima.cz/api/v1//products/id-{video_id}/play', | |
153 | video_id, note='Getting manifest URLs', errnote='Failed to get manifest URLs', | |
154 | headers={'X-OTT-Access-Token': self.access_token}, | |
155 | expected_status=403) | |
156 | ||
157 | self._raise_access_error(metadata.get('errorCode')) | |
158 | ||
159 | stream_infos = metadata.get('streamInfos') | |
160 | formats = [] | |
161 | if stream_infos is None: | |
162 | self.raise_no_formats('Reading stream infos failed', expected=True) | |
163 | else: | |
164 | for manifest in stream_infos: | |
165 | manifest_type = manifest.get('type') | |
166 | manifest_url = manifest.get('url') | |
167 | ext = determine_ext(manifest_url) | |
168 | if manifest_type == 'HLS' or ext == 'm3u8': | |
169 | formats += self._extract_m3u8_formats( | |
170 | manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
171 | m3u8_id='hls', fatal=False) | |
172 | elif manifest_type == 'DASH' or ext == 'mpd': | |
173 | formats += self._extract_mpd_formats( | |
174 | manifest_url, video_id, mpd_id='dash', fatal=False) | |
e1b7c54d | 175 | |
2530b68d | 176 | final_result = self._search_json_ld(webpage, video_id, default={}) |
e1b7c54d | 177 | final_result.update({ |
178 | 'id': video_id, | |
179 | 'title': title, | |
180 | 'thumbnail': self._html_search_meta( | |
181 | ['thumbnail', 'og:image', 'twitter:image'], | |
182 | webpage, 'thumbnail', default=None), | |
183 | 'formats': formats, | |
184 | 'description': self._html_search_meta( | |
185 | ['description', 'og:description', 'twitter:description'], | |
186 | webpage, 'description', default=None)}) | |
187 | ||
188 | return final_result | |
189 | ||
190 | ||
191 | class IPrimaCNNIE(InfoExtractor): | |
192 | _VALID_URL = r'https?://cnn\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' | |
193 | _GEO_BYPASS = False | |
194 | ||
195 | _TESTS = [{ | |
196 | 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova', | |
197 | 'info_dict': { | |
198 | 'id': 'p716177', | |
199 | 'ext': 'mp4', | |
200 | 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e', | |
201 | }, | |
202 | 'params': { | |
203 | 'skip_download': 'm3u8' | |
204 | } | |
973f2532 | 205 | }] |
7881a644 | 206 | |
207 | def _real_extract(self, url): | |
369e7e3f | 208 | video_id = self._match_id(url) |
7881a644 | 209 | |
09322ccc S |
210 | self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1') |
211 | ||
7881a644 | 212 | webpage = self._download_webpage(url, video_id) |
213 | ||
30fa5c60 S |
214 | title = self._og_search_title( |
215 | webpage, default=None) or self._search_regex( | |
216 | r'<h1>([^<]+)', webpage, 'title') | |
217 | ||
a2637a2d S |
218 | video_id = self._search_regex( |
219 | (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', | |
30fa5c60 S |
220 | r'data-product="([^"]+)">', |
221 | r'id=["\']player-(p\d+)"', | |
b7770046 U |
222 | r'playerId\s*:\s*["\']player-(p\d+)', |
223 | r'\bvideos\s*=\s*["\'](p\d+)'), | |
a2637a2d | 224 | webpage, 'real id') |
82642235 | 225 | |
82f66218 S |
226 | playerpage = self._download_webpage( |
227 | 'http://play.iprima.cz/prehravac/init', | |
228 | video_id, note='Downloading player', query={ | |
229 | '_infuse': 1, | |
230 | '_ts': round(time.time()), | |
231 | 'productId': video_id, | |
232 | }, headers={'Referer': url}) | |
7881a644 | 233 | |
369e7e3f | 234 | formats = [] |
7881a644 | 235 | |
369e7e3f S |
236 | def extract_formats(format_url, format_key=None, lang=None): |
237 | ext = determine_ext(format_url) | |
238 | new_formats = [] | |
239 | if format_key == 'hls' or ext == 'm3u8': | |
240 | new_formats = self._extract_m3u8_formats( | |
241 | format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
242 | m3u8_id='hls', fatal=False) | |
243 | elif format_key == 'dash' or ext == 'mpd': | |
244 | return | |
245 | new_formats = self._extract_mpd_formats( | |
246 | format_url, video_id, mpd_id='dash', fatal=False) | |
247 | if lang: | |
248 | for f in new_formats: | |
249 | if not f.get('language'): | |
250 | f['language'] = lang | |
251 | formats.extend(new_formats) | |
252 | ||
253 | options = self._parse_json( | |
254 | self._search_regex( | |
0bbcc8a1 | 255 | r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]', |
369e7e3f S |
256 | playerpage, 'player options', default='{}'), |
257 | video_id, transform_source=js_to_json, fatal=False) | |
258 | if options: | |
259 | for key, tracks in options.get('tracks', {}).items(): | |
260 | if not isinstance(tracks, list): | |
261 | continue | |
262 | for track in tracks: | |
263 | src = track.get('src') | |
264 | if src: | |
265 | extract_formats(src, key.lower(), track.get('lang')) | |
266 | ||
267 | if not formats: | |
268 | for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): | |
269 | extract_formats(src) | |
91264ce5 | 270 | |
3c6b3bf2 | 271 | if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: |
b7da73eb | 272 | self.raise_geo_restricted(countries=['CZ'], metadata_available=True) |
3c6b3bf2 | 273 | |
7881a644 | 274 | return { |
f406c787 | 275 | 'id': video_id, |
30fa5c60 S |
276 | 'title': title, |
277 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
7881a644 | 278 | 'formats': formats, |
30fa5c60 | 279 | 'description': self._og_search_description(webpage, default=None), |
91264ce5 | 280 | } |