]>
Commit | Line | Data |
---|---|---|
34f00179 | 1 | import base64 |
fdeea726 | 2 | import codecs |
c305a25c | 3 | import datetime as dt |
34f00179 | 4 | import hashlib |
5 | import hmac | |
05664a2f | 6 | import json |
da8d2de2 | 7 | import random |
ac668111 | 8 | import re |
add96eb9 | 9 | import urllib.parse |
8b0d7a66 KM |
10 | |
11 | from .common import InfoExtractor | |
add96eb9 | 12 | from ..compat import compat_ord |
8b0d7a66 | 13 | from ..utils import ( |
8b0d7a66 | 14 | ExtractorError, |
577281b0 KM |
15 | float_or_none, |
16 | int_or_none, | |
38d70284 | 17 | merge_dicts, |
0c265486 | 18 | multipart_encode, |
577281b0 | 19 | parse_duration, |
34f00179 | 20 | traverse_obj, |
21 | try_call, | |
05664a2f | 22 | try_get, |
ac668111 | 23 | urljoin, |
8b0d7a66 KM |
24 | ) |
25 | ||
26 | ||
27 | class CDAIE(InfoExtractor): | |
f1ced6df | 28 | _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' |
34f00179 | 29 | _NETRC_MACHINE = 'cdapl' |
30 | ||
da8d2de2 | 31 | _BASE_URL = 'https://www.cda.pl' |
34f00179 | 32 | _BASE_API_URL = 'https://api.cda.pl' |
33 | _API_HEADERS = { | |
34 | 'Accept': 'application/vnd.cda.public+json', | |
34f00179 | 35 | } |
36 | # hardcoded in the app | |
37 | _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q' | |
38 | _BEARER_CACHE = 'cda-bearer' | |
39 | ||
f1ced6df S |
40 | _TESTS = [{ |
41 | 'url': 'http://www.cda.pl/video/5749950c', | |
42 | 'md5': '6f844bf51b15f31fae165365707ae970', | |
43 | 'info_dict': { | |
44 | 'id': '5749950c', | |
45 | 'ext': 'mp4', | |
46 | 'height': 720, | |
47 | 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', | |
577281b0 | 48 | 'description': 'md5:269ccd135d550da90d1662651fcb9772', |
ec85ded8 | 49 | 'thumbnail': r're:^https?://.*\.jpg$', |
577281b0 | 50 | 'average_rating': float, |
0c265486 YCH |
51 | 'duration': 39, |
52 | 'age_limit': 0, | |
05664a2f | 53 | 'upload_date': '20160221', |
54 | 'timestamp': 1456078244, | |
add96eb9 | 55 | }, |
f1ced6df S |
56 | }, { |
57 | 'url': 'http://www.cda.pl/video/57413289', | |
58 | 'md5': 'a88828770a8310fc00be6c95faf7f4d5', | |
59 | 'info_dict': { | |
60 | 'id': '57413289', | |
61 | 'ext': 'mp4', | |
62 | 'title': 'Lądowanie na lotnisku na Maderze', | |
577281b0 | 63 | 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', |
ec85ded8 | 64 | 'thumbnail': r're:^https?://.*\.jpg$', |
577281b0 | 65 | 'uploader': 'crash404', |
577281b0 | 66 | 'average_rating': float, |
0c265486 YCH |
67 | 'duration': 137, |
68 | 'age_limit': 0, | |
6d8a53d8 P |
69 | 'upload_date': '20160220', |
70 | 'timestamp': 1455968218, | |
add96eb9 | 71 | }, |
0c265486 | 72 | }, { |
6d8a53d8 P |
73 | # Age-restricted with vfilm redirection |
74 | 'url': 'https://www.cda.pl/video/8753244c4', | |
75 | 'md5': 'd8eeb83d63611289507010d3df3bb8b3', | |
0c265486 | 76 | 'info_dict': { |
6d8a53d8 | 77 | 'id': '8753244c4', |
0c265486 | 78 | 'ext': 'mp4', |
6d8a53d8 P |
79 | 'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?', |
80 | 'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e', | |
0c265486 | 81 | 'height': 1080, |
6d8a53d8 | 82 | 'uploader': 'arhn eu', |
0c265486 | 83 | 'thumbnail': r're:^https?://.*\.jpg$', |
6d8a53d8 | 84 | 'duration': 991, |
0c265486 | 85 | 'age_limit': 18, |
0c265486 | 86 | 'average_rating': float, |
6d8a53d8 P |
87 | 'timestamp': 1633888264, |
88 | 'upload_date': '20211010', | |
add96eb9 | 89 | }, |
6d8a53d8 P |
90 | }, { |
91 | # Age-restricted without vfilm redirection | |
92 | 'url': 'https://www.cda.pl/video/17028157b8', | |
93 | 'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992', | |
94 | 'info_dict': { | |
95 | 'id': '17028157b8', | |
96 | 'ext': 'mp4', | |
97 | 'title': 'STENDUPY MICHAŁ OGIŃSKI', | |
98 | 'description': 'md5:5851f3272bfc31f762d616040a1d609a', | |
99 | 'height': 480, | |
100 | 'uploader': 'oginski', | |
101 | 'thumbnail': r're:^https?://.*\.jpg$', | |
102 | 'duration': 18855, | |
103 | 'age_limit': 18, | |
104 | 'average_rating': float, | |
105 | 'timestamp': 1699705901, | |
106 | 'upload_date': '20231111', | |
add96eb9 | 107 | }, |
f1ced6df S |
108 | }, { |
109 | 'url': 'http://ebd.cda.pl/0x0/5749950c', | |
110 | 'only_matching': True, | |
111 | }] | |
8b0d7a66 | 112 | |
0c265486 | 113 | def _download_age_confirm_page(self, url, video_id, *args, **kwargs): |
6d8a53d8 | 114 | data, content_type = multipart_encode({'age_confirm': ''}) |
0c265486 | 115 | return self._download_webpage( |
6d8a53d8 | 116 | url, video_id, *args, |
0c265486 YCH |
117 | data=data, headers={ |
118 | 'Referer': url, | |
119 | 'Content-Type': content_type, | |
120 | }, **kwargs) | |
121 | ||
34f00179 | 122 | def _perform_login(self, username, password): |
da8d2de2 | 123 | app_version = random.choice(( |
124 | '1.2.88 build 15306', | |
125 | '1.2.174 build 18469', | |
126 | )) | |
127 | android_version = random.randrange(8, 14) | |
128 | phone_model = random.choice(( | |
129 | # x-kom.pl top selling Android smartphones, as of 2022-12-26 | |
130 | # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android | |
131 | 'ASUS ZenFone 8', | |
132 | 'Motorola edge 20 5G', | |
133 | 'Motorola edge 30 neo 5G', | |
134 | 'Motorola moto g22', | |
135 | 'OnePlus Nord 2T 5G', | |
136 | 'Samsung Galaxy A32 SM‑A325F', | |
137 | 'Samsung Galaxy M13', | |
138 | 'Samsung Galaxy S20 FE 5G', | |
139 | 'Xiaomi 11T', | |
140 | 'Xiaomi POCO M4 Pro', | |
141 | 'Xiaomi Redmi 10', | |
142 | 'Xiaomi Redmi 10C', | |
143 | 'Xiaomi Redmi 9C NFC', | |
144 | 'Xiaomi Redmi Note 10 Pro', | |
145 | 'Xiaomi Redmi Note 11 Pro', | |
146 | 'Xiaomi Redmi Note 11', | |
147 | 'Xiaomi Redmi Note 11S 5G', | |
148 | 'Xiaomi Redmi Note 11S', | |
149 | 'realme 10', | |
150 | 'realme 9 Pro+', | |
151 | 'vivo Y33s', | |
152 | )) | |
153 | self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})' | |
154 | ||
34f00179 | 155 | cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {} |
c305a25c | 156 | if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5: |
34f00179 | 157 | self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}' |
158 | return | |
159 | ||
160 | password_hash = base64.urlsafe_b64encode(hmac.new( | |
161 | b's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P', | |
162 | ''.join(f'{bytes((bt & 255, )).hex():0>2}' | |
163 | for bt in hashlib.md5(password.encode()).digest()).encode(), | |
164 | hashlib.sha256).digest()).decode().replace('=', '') | |
165 | ||
166 | token_res = self._download_json( | |
167 | f'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data=b'', | |
168 | headers={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH}, | |
169 | query={ | |
170 | 'grant_type': 'password', | |
171 | 'login': username, | |
172 | 'password': password_hash, | |
173 | }) | |
174 | self.cache.store(self._BEARER_CACHE, username, { | |
175 | 'token': token_res['access_token'], | |
c305a25c | 176 | 'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(), |
34f00179 | 177 | }) |
178 | self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}' | |
179 | ||
8b0d7a66 KM |
180 | def _real_extract(self, url): |
181 | video_id = self._match_id(url) | |
34f00179 | 182 | |
183 | if 'Authorization' in self._API_HEADERS: | |
184 | return self._api_extract(video_id) | |
185 | else: | |
6d8a53d8 | 186 | return self._web_extract(video_id) |
34f00179 | 187 | |
188 | def _api_extract(self, video_id): | |
189 | meta = self._download_json( | |
190 | f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video'] | |
191 | ||
34f00179 | 192 | uploader = traverse_obj(meta, 'author', 'login') |
193 | ||
194 | formats = [{ | |
195 | 'url': quality['file'], | |
196 | 'format': quality.get('title'), | |
197 | 'resolution': quality.get('name'), | |
198 | 'height': try_call(lambda: int(quality['name'][:-1])), | |
199 | 'filesize': quality.get('length'), | |
200 | } for quality in meta['qualities'] if quality.get('file')] | |
201 | ||
da8d2de2 | 202 | if meta.get('premium') and not meta.get('premium_free') and not formats: |
203 | raise ExtractorError( | |
204 | 'Video requires CDA Premium - subscription needed', expected=True) | |
205 | ||
34f00179 | 206 | return { |
207 | 'id': video_id, | |
208 | 'title': meta.get('title'), | |
209 | 'description': meta.get('description'), | |
210 | 'uploader': None if uploader == 'anonim' else uploader, | |
211 | 'average_rating': float_or_none(meta.get('rating')), | |
212 | 'thumbnail': meta.get('thumb'), | |
213 | 'formats': formats, | |
214 | 'duration': meta.get('duration'), | |
215 | 'age_limit': 18 if meta.get('for_adults') else 0, | |
216 | 'view_count': meta.get('views'), | |
217 | } | |
218 | ||
6d8a53d8 | 219 | def _web_extract(self, video_id): |
577281b0 | 220 | self._set_cookie('cda.pl', 'cda.player', 'html5') |
6d8a53d8 | 221 | webpage, urlh = self._download_webpage_handle( |
da8d2de2 | 222 | f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) |
8b0d7a66 KM |
223 | |
224 | if 'Ten film jest dostępny dla użytkowników premium' in webpage: | |
da8d2de2 | 225 | self.raise_login_required('This video is only available for premium users') |
8b0d7a66 | 226 | |
cc2db878 | 227 | if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage): |
228 | self.raise_geo_restricted() | |
229 | ||
0c265486 | 230 | need_confirm_age = False |
6d8a53d8 | 231 | if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")', |
0c265486 YCH |
232 | webpage, 'birthday validate form', default=None): |
233 | webpage = self._download_age_confirm_page( | |
6d8a53d8 | 234 | urlh.url, video_id, note='Confirming age') |
0c265486 YCH |
235 | need_confirm_age = True |
236 | ||
8b0d7a66 KM |
237 | formats = [] |
238 | ||
577281b0 KM |
239 | uploader = self._search_regex(r'''(?x) |
240 | <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> | |
241 | (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? | |
242 | <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> | |
243 | ''', webpage, 'uploader', default=None, group='uploader') | |
577281b0 | 244 | average_rating = self._search_regex( |
38d70284 | 245 | (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', |
246 | r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False, | |
247 | group='rating_value') | |
577281b0 | 248 | |
f1ced6df S |
249 | info_dict = { |
250 | 'id': video_id, | |
577281b0 KM |
251 | 'title': self._og_search_title(webpage), |
252 | 'description': self._og_search_description(webpage), | |
253 | 'uploader': uploader, | |
577281b0 KM |
254 | 'average_rating': float_or_none(average_rating), |
255 | 'thumbnail': self._og_search_thumbnail(webpage), | |
f1ced6df S |
256 | 'formats': formats, |
257 | 'duration': None, | |
0c265486 | 258 | 'age_limit': 18 if need_confirm_age else 0, |
f1ced6df | 259 | } |
8b0d7a66 | 260 | |
41d1cca3 | 261 | info = self._search_json_ld(webpage, video_id, default={}) |
262 | ||
38d70284 | 263 | # Source: https://www.cda.pl/js/player.js?t=1606154898 |
264 | def decrypt_file(a): | |
265 | for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): | |
266 | a = a.replace(p, '') | |
add96eb9 | 267 | a = urllib.parse.unquote(a) |
38d70284 | 268 | b = [] |
269 | for c in a: | |
270 | f = compat_ord(c) | |
ac668111 | 271 | b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f)) |
38d70284 | 272 | a = ''.join(b) |
273 | a = a.replace('.cda.mp4', '') | |
274 | for p in ('.2cda.pl', '.3cda.pl'): | |
275 | a = a.replace(p, '.cda.pl') | |
276 | if '/upstream' in a: | |
277 | a = a.replace('/upstream', '.mp4/upstream') | |
278 | return 'https://' + a | |
279 | return 'https://' + a + '.mp4' | |
280 | ||
f1ced6df | 281 | def extract_format(page, version): |
f8f18f33 | 282 | json_str = self._html_search_regex( |
577281b0 | 283 | r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, |
add96eb9 | 284 | f'{version} player_json', fatal=False, group='player_data') |
577281b0 KM |
285 | if not json_str: |
286 | return | |
287 | player_data = self._parse_json( | |
add96eb9 | 288 | json_str, f'{version} player_data', fatal=False) |
577281b0 KM |
289 | if not player_data: |
290 | return | |
291 | video = player_data.get('video') | |
292 | if not video or 'file' not in video: | |
add96eb9 | 293 | self.report_warning(f'Unable to extract {version} version information') |
f1ced6df | 294 | return |
fdeea726 AS |
295 | if video['file'].startswith('uggc'): |
296 | video['file'] = codecs.decode(video['file'], 'rot_13') | |
297 | if video['file'].endswith('adc.mp4'): | |
298 | video['file'] = video['file'].replace('adc.mp4', '.mp4') | |
38d70284 | 299 | elif not video['file'].startswith('http'): |
300 | video['file'] = decrypt_file(video['file']) | |
05664a2f | 301 | video_quality = video.get('quality') |
302 | qualities = video.get('qualities', {}) | |
303 | video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality) | |
304 | info_dict['formats'].append({ | |
577281b0 | 305 | 'url': video['file'], |
05664a2f | 306 | 'format_id': video_quality, |
307 | 'height': int_or_none(video_quality[:-1]), | |
308 | }) | |
309 | for quality, cda_quality in qualities.items(): | |
310 | if quality == video_quality: | |
311 | continue | |
312 | data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, | |
313 | 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} | |
add96eb9 | 314 | data = json.dumps(data).encode() |
05664a2f | 315 | video_url = self._download_json( |
316 | f'https://www.cda.pl/video/{video_id}', video_id, headers={ | |
317 | 'Content-Type': 'application/json', | |
add96eb9 | 318 | 'X-Requested-With': 'XMLHttpRequest', |
05664a2f | 319 | }, data=data, note=f'Fetching {quality} url', |
320 | errnote=f'Failed to fetch {quality} url', fatal=False) | |
321 | if try_get(video_url, lambda x: x['result']['status']) == 'ok': | |
322 | video_url = try_get(video_url, lambda x: x['result']['resp']) | |
323 | info_dict['formats'].append({ | |
324 | 'url': video_url, | |
325 | 'format_id': quality, | |
add96eb9 | 326 | 'height': int_or_none(quality[:-1]), |
05664a2f | 327 | }) |
328 | ||
f1ced6df | 329 | if not info_dict['duration']: |
577281b0 | 330 | info_dict['duration'] = parse_duration(video.get('duration')) |
f1ced6df S |
331 | |
332 | extract_format(webpage, 'default') | |
333 | ||
334 | for href, resolution in re.findall( | |
335 | r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', | |
336 | webpage): | |
0c265486 YCH |
337 | if need_confirm_age: |
338 | handler = self._download_age_confirm_page | |
339 | else: | |
340 | handler = self._download_webpage | |
341 | ||
342 | webpage = handler( | |
41d1cca3 | 343 | urljoin(self._BASE_URL, href), video_id, |
add96eb9 | 344 | f'Downloading {resolution} version information', fatal=False) |
8b0d7a66 | 345 | if not webpage: |
f1ced6df S |
346 | # Manually report warning because empty page is returned when |
347 | # invalid version is requested. | |
add96eb9 | 348 | self.report_warning(f'Unable to download {resolution} version information') |
8b0d7a66 | 349 | continue |
0c265486 | 350 | |
f1ced6df | 351 | extract_format(webpage, resolution) |
8b0d7a66 | 352 | |
38d70284 | 353 | return merge_dicts(info_dict, info) |