11 from .common
import InfoExtractor
12 from ..compat
import compat_ord
27 class CDAIE(InfoExtractor
):
28 _VALID_URL
= r
'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
29 _NETRC_MACHINE
= 'cdapl'
31 _BASE_URL
= 'https://www.cda.pl'
32 _BASE_API_URL
= 'https://api.cda.pl'
34 'Accept': 'application/vnd.cda.public+json',
36 # hardcoded in the app
37 _LOGIN_REQUEST_AUTH
= 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
38 _BEARER_CACHE
= 'cda-bearer'
41 'url': 'http://www.cda.pl/video/5749950c',
42 'md5': '6f844bf51b15f31fae165365707ae970',
47 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
48 'description': 'md5:269ccd135d550da90d1662651fcb9772',
49 'thumbnail': r
're:^https?://.*\.jpg$',
50 'average_rating': float,
53 'upload_date': '20160221',
54 'timestamp': 1456078244,
57 'url': 'http://www.cda.pl/video/57413289',
58 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
62 'title': 'Lądowanie na lotnisku na Maderze',
63 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
64 'thumbnail': r
're:^https?://.*\.jpg$',
65 'uploader': 'crash404',
66 'average_rating': float,
69 'upload_date': '20160220',
70 'timestamp': 1455968218,
73 # Age-restricted with vfilm redirection
74 'url': 'https://www.cda.pl/video/8753244c4',
75 'md5': 'd8eeb83d63611289507010d3df3bb8b3',
79 'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
80 'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
82 'uploader': 'arhn eu',
83 'thumbnail': r
're:^https?://.*\.jpg$',
86 'average_rating': float,
87 'timestamp': 1633888264,
88 'upload_date': '20211010',
91 # Age-restricted without vfilm redirection
92 'url': 'https://www.cda.pl/video/17028157b8',
93 'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
97 'title': 'STENDUPY MICHAŁ OGIŃSKI',
98 'description': 'md5:5851f3272bfc31f762d616040a1d609a',
100 'uploader': 'oginski',
101 'thumbnail': r
're:^https?://.*\.jpg$',
104 'average_rating': float,
105 'timestamp': 1699705901,
106 'upload_date': '20231111',
109 'url': 'http://ebd.cda.pl/0x0/5749950c',
110 'only_matching': True,
113 def _download_age_confirm_page(self
, url
, video_id
, *args
, **kwargs
):
114 data
, content_type
= multipart_encode({'age_confirm': ''}
)
115 return self
._download
_webpage
(
116 url
, video_id
, *args
,
119 'Content-Type': content_type
,
122 def _perform_login(self
, username
, password
):
123 app_version
= random
.choice((
124 '1.2.88 build 15306',
125 '1.2.174 build 18469',
127 android_version
= random
.randrange(8, 14)
128 phone_model
= random
.choice((
129 # x-kom.pl top selling Android smartphones, as of 2022-12-26
130 # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
132 'Motorola edge 20 5G',
133 'Motorola edge 30 neo 5G',
135 'OnePlus Nord 2T 5G',
136 'Samsung Galaxy A32 SM‑A325F',
137 'Samsung Galaxy M13',
138 'Samsung Galaxy S20 FE 5G',
140 'Xiaomi POCO M4 Pro',
143 'Xiaomi Redmi 9C NFC',
144 'Xiaomi Redmi Note 10 Pro',
145 'Xiaomi Redmi Note 11 Pro',
146 'Xiaomi Redmi Note 11',
147 'Xiaomi Redmi Note 11S 5G',
148 'Xiaomi Redmi Note 11S',
153 self
._API
_HEADERS
['User-Agent'] = f
'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
155 cached_bearer
= self
.cache
.load(self
._BEARER
_CACHE
, username
) or {}
156 if cached_bearer
.get('valid_until', 0) > dt
.datetime
.now().timestamp() + 5:
157 self
._API
_HEADERS
['Authorization'] = f
'Bearer {cached_bearer["token"]}'
160 password_hash
= base64
.urlsafe_b64encode(hmac
.new(
161 b
's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
162 ''.join(f
'{bytes((bt & 255, )).hex():0>2}'
163 for bt
in hashlib
.md5(password
.encode()).digest()).encode(),
164 hashlib
.sha256
).digest()).decode().replace('=', '')
166 token_res
= self
._download
_json
(
167 f
'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data
=b
'',
168 headers
={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH}
,
170 'grant_type': 'password',
172 'password': password_hash
,
174 self
.cache
.store(self
._BEARER
_CACHE
, username
, {
175 'token': token_res
['access_token'],
176 'valid_until': token_res
['expires_in'] + dt
.datetime
.now().timestamp(),
178 self
._API
_HEADERS
['Authorization'] = f
'Bearer {token_res["access_token"]}'
180 def _real_extract(self
, url
):
181 video_id
= self
._match
_id
(url
)
183 if 'Authorization' in self
._API
_HEADERS
:
184 return self
._api
_extract
(video_id
)
186 return self
._web
_extract
(video_id
)
188 def _api_extract(self
, video_id
):
189 meta
= self
._download
_json
(
190 f
'{self._BASE_API_URL}/video/{video_id}', video_id
, headers
=self
._API
_HEADERS
)['video']
192 uploader
= traverse_obj(meta
, 'author', 'login')
195 'url': quality
['file'],
196 'format': quality
.get('title'),
197 'resolution': quality
.get('name'),
198 'height': try_call(lambda: int(quality
['name'][:-1])),
199 'filesize': quality
.get('length'),
200 } for quality
in meta
['qualities'] if quality
.get('file')]
202 if meta
.get('premium') and not meta
.get('premium_free') and not formats
:
203 raise ExtractorError(
204 'Video requires CDA Premium - subscription needed', expected
=True)
208 'title': meta
.get('title'),
209 'description': meta
.get('description'),
210 'uploader': None if uploader
== 'anonim' else uploader
,
211 'average_rating': float_or_none(meta
.get('rating')),
212 'thumbnail': meta
.get('thumb'),
214 'duration': meta
.get('duration'),
215 'age_limit': 18 if meta
.get('for_adults') else 0,
216 'view_count': meta
.get('views'),
219 def _web_extract(self
, video_id
):
220 self
._set
_cookie
('cda.pl', 'cda.player', 'html5')
221 webpage
, urlh
= self
._download
_webpage
_handle
(
222 f
'{self._BASE_URL}/video/{video_id}/vfilm', video_id
)
224 if 'Ten film jest dostępny dla użytkowników premium' in webpage
:
225 self
.raise_login_required('This video is only available for premium users')
227 if re
.search(r
'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage
):
228 self
.raise_geo_restricted()
230 need_confirm_age
= False
231 if self
._html
_search
_regex
(r
'(<button[^>]+name="[^"]*age_confirm[^"]*")',
232 webpage
, 'birthday validate form', default
=None):
233 webpage
= self
._download
_age
_confirm
_page
(
234 urlh
.url
, video_id
, note
='Confirming age')
235 need_confirm_age
= True
239 uploader
= self
._search
_regex
(r
'''(?x)
240 <(span|meta)[^>]+itemprop=(["\'])author\
2[^
>]*>
241 (?
:<\
1[^
>]*>[^
<]*</\
1>|
(?
!</\
1>)(?
:.|
\n))*?
242 <(span|meta
)[^
>]+itemprop
=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
243 ''', webpage, 'uploader', default=None, group='uploader')
244 average_rating = self._search_regex(
245 (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\
1[^
>]*>(?P
<rating_value
>[0-9.]+)',
246 r'<span
[^
>]+\bclass
=["\']rating["\'][^
>]*>(?P
<rating_value
>[0-9.]+)'), webpage, 'rating
', fatal=False,
247 group='rating_value
')
251 'title
': self._og_search_title(webpage),
252 'description
': self._og_search_description(webpage),
253 'uploader
': uploader,
254 'average_rating
': float_or_none(average_rating),
255 'thumbnail
': self._og_search_thumbnail(webpage),
258 'age_limit
': 18 if need_confirm_age else 0,
261 info = self._search_json_ld(webpage, video_id, default={})
263 # Source: https://www.cda.pl/js/player.js?t=1606154898
265 for p in ('_XDDD
', '_CDA
', '_ADC
', '_CXD
', '_QWE
', '_Q5
', '_IKSDE
'):
267 a = urllib.parse.unquote(a)
271 b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
273 a = a.replace('.cda
.mp4
', '')
274 for p in ('.2cda
.pl
', '.3cda
.pl
'):
275 a = a.replace(p, '.cda
.pl
')
277 a = a.replace('/upstream
', '.mp4
/upstream
')
278 return 'https
://' + a
279 return 'https
://' + a + '.mp4
'
281 def extract_format(page, version):
282 json_str = self._html_search_regex(
283 r'player_data
=(\\?
["\'])(?P<player_data>.+?)\1', page,
284 f'{version} player_json', fatal=False, group='player_data')
287 player_data = self._parse_json(
288 json_str, f'{version} player_data', fatal=False)
291 video = player_data.get('video')
292 if not video or 'file' not in video:
293 self.report_warning(f'Unable to extract {version} version information')
295 if video['file'].startswith('uggc'):
296 video['file'] = codecs.decode(video['file'], 'rot_13')
297 if video['file'].endswith('adc.mp4'):
298 video['file'] = video['file'].replace('adc.mp4', '.mp4')
299 elif not video['file'].startswith('http'):
300 video['file'] = decrypt_file(video['file'])
301 video_quality = video.get('quality')
302 qualities = video.get('qualities', {})
303 video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
304 info_dict['formats'].append({
305 'url': video['file'],
306 'format_id': video_quality,
307 'height': int_or_none(video_quality[:-1]),
309 for quality, cda_quality in qualities.items():
310 if quality == video_quality:
312 data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
313 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
314 data = json.dumps(data).encode()
315 video_url = self._download_json(
316 f'https://www.cda.pl/video/{video_id}', video_id, headers={
317 'Content-Type': 'application/json',
318 'X-Requested-With': 'XMLHttpRequest',
319 }, data=data, note=f'Fetching {quality} url',
320 errnote=f'Failed to fetch {quality} url', fatal=False)
321 if try_get(video_url, lambda x: x['result']['status']) == 'ok':
322 video_url = try_get(video_url, lambda x: x['result']['resp'])
323 info_dict['formats'].append({
325 'format_id': quality,
326 'height': int_or_none(quality[:-1]),
329 if not info_dict['duration']:
330 info_dict['duration'] = parse_duration(video.get('duration'))
332 extract_format(webpage, 'default')
334 for href, resolution in re.findall(
335 r'<a[^>]+data-quality="[^
"]+"[^
>]+href
="([^"]+)"[^>]+class="quality
-btn
"[^>]*>([0-9]+p)',
338 handler = self._download_age_confirm_page
340 handler = self._download_webpage
343 urljoin(self._BASE_URL, href), video_id,
344 f'Downloading {resolution} version information', fatal=False)
346 # Manually report warning because empty page is returned when
347 # invalid version is requested.
348 self.report_warning(f'Unable to download {resolution} version information')
351 extract_format(webpage, resolution)
353 return merge_dicts(info_dict, info)