10 from .common
import InfoExtractor
11 from ..compat
import compat_ord
, compat_urllib_parse_unquote
26 class CDAIE(InfoExtractor
):
27 _VALID_URL
= r
'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
28 _NETRC_MACHINE
= 'cdapl'
30 _BASE_URL
= 'https://www.cda.pl'
31 _BASE_API_URL
= 'https://api.cda.pl'
33 'Accept': 'application/vnd.cda.public+json',
35 # hardcoded in the app
36 _LOGIN_REQUEST_AUTH
= 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
37 _BEARER_CACHE
= 'cda-bearer'
40 'url': 'http://www.cda.pl/video/5749950c',
41 'md5': '6f844bf51b15f31fae165365707ae970',
46 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
47 'description': 'md5:269ccd135d550da90d1662651fcb9772',
48 'thumbnail': r
're:^https?://.*\.jpg$',
49 'average_rating': float,
52 'upload_date': '20160221',
53 'timestamp': 1456078244,
56 'url': 'http://www.cda.pl/video/57413289',
57 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
61 'title': 'Lądowanie na lotnisku na Maderze',
62 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
63 'thumbnail': r
're:^https?://.*\.jpg$',
64 'uploader': 'crash404',
65 'average_rating': float,
68 'upload_date': '20160220',
69 'timestamp': 1455968218,
72 # Age-restricted with vfilm redirection
73 'url': 'https://www.cda.pl/video/8753244c4',
74 'md5': 'd8eeb83d63611289507010d3df3bb8b3',
78 'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
79 'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
81 'uploader': 'arhn eu',
82 'thumbnail': r
're:^https?://.*\.jpg$',
85 'average_rating': float,
86 'timestamp': 1633888264,
87 'upload_date': '20211010',
90 # Age-restricted without vfilm redirection
91 'url': 'https://www.cda.pl/video/17028157b8',
92 'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
96 'title': 'STENDUPY MICHAŁ OGIŃSKI',
97 'description': 'md5:5851f3272bfc31f762d616040a1d609a',
99 'uploader': 'oginski',
100 'thumbnail': r
're:^https?://.*\.jpg$',
103 'average_rating': float,
104 'timestamp': 1699705901,
105 'upload_date': '20231111',
108 'url': 'http://ebd.cda.pl/0x0/5749950c',
109 'only_matching': True,
112 def _download_age_confirm_page(self
, url
, video_id
, *args
, **kwargs
):
113 data
, content_type
= multipart_encode({'age_confirm': ''}
)
114 return self
._download
_webpage
(
115 url
, video_id
, *args
,
118 'Content-Type': content_type
,
121 def _perform_login(self
, username
, password
):
122 app_version
= random
.choice((
123 '1.2.88 build 15306',
124 '1.2.174 build 18469',
126 android_version
= random
.randrange(8, 14)
127 phone_model
= random
.choice((
128 # x-kom.pl top selling Android smartphones, as of 2022-12-26
129 # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
131 'Motorola edge 20 5G',
132 'Motorola edge 30 neo 5G',
134 'OnePlus Nord 2T 5G',
135 'Samsung Galaxy A32 SM‑A325F',
136 'Samsung Galaxy M13',
137 'Samsung Galaxy S20 FE 5G',
139 'Xiaomi POCO M4 Pro',
142 'Xiaomi Redmi 9C NFC',
143 'Xiaomi Redmi Note 10 Pro',
144 'Xiaomi Redmi Note 11 Pro',
145 'Xiaomi Redmi Note 11',
146 'Xiaomi Redmi Note 11S 5G',
147 'Xiaomi Redmi Note 11S',
152 self
._API
_HEADERS
['User-Agent'] = f
'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
154 cached_bearer
= self
.cache
.load(self
._BEARER
_CACHE
, username
) or {}
155 if cached_bearer
.get('valid_until', 0) > dt
.datetime
.now().timestamp() + 5:
156 self
._API
_HEADERS
['Authorization'] = f
'Bearer {cached_bearer["token"]}'
159 password_hash
= base64
.urlsafe_b64encode(hmac
.new(
160 b
's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
161 ''.join(f
'{bytes((bt & 255, )).hex():0>2}'
162 for bt
in hashlib
.md5(password
.encode()).digest()).encode(),
163 hashlib
.sha256
).digest()).decode().replace('=', '')
165 token_res
= self
._download
_json
(
166 f
'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data
=b
'',
167 headers
={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH}
,
169 'grant_type': 'password',
171 'password': password_hash
,
173 self
.cache
.store(self
._BEARER
_CACHE
, username
, {
174 'token': token_res
['access_token'],
175 'valid_until': token_res
['expires_in'] + dt
.datetime
.now().timestamp(),
177 self
._API
_HEADERS
['Authorization'] = f
'Bearer {token_res["access_token"]}'
179 def _real_extract(self
, url
):
180 video_id
= self
._match
_id
(url
)
182 if 'Authorization' in self
._API
_HEADERS
:
183 return self
._api
_extract
(video_id
)
185 return self
._web
_extract
(video_id
)
187 def _api_extract(self
, video_id
):
188 meta
= self
._download
_json
(
189 f
'{self._BASE_API_URL}/video/{video_id}', video_id
, headers
=self
._API
_HEADERS
)['video']
191 uploader
= traverse_obj(meta
, 'author', 'login')
194 'url': quality
['file'],
195 'format': quality
.get('title'),
196 'resolution': quality
.get('name'),
197 'height': try_call(lambda: int(quality
['name'][:-1])),
198 'filesize': quality
.get('length'),
199 } for quality
in meta
['qualities'] if quality
.get('file')]
201 if meta
.get('premium') and not meta
.get('premium_free') and not formats
:
202 raise ExtractorError(
203 'Video requires CDA Premium - subscription needed', expected
=True)
207 'title': meta
.get('title'),
208 'description': meta
.get('description'),
209 'uploader': None if uploader
== 'anonim' else uploader
,
210 'average_rating': float_or_none(meta
.get('rating')),
211 'thumbnail': meta
.get('thumb'),
213 'duration': meta
.get('duration'),
214 'age_limit': 18 if meta
.get('for_adults') else 0,
215 'view_count': meta
.get('views'),
218 def _web_extract(self
, video_id
):
219 self
._set
_cookie
('cda.pl', 'cda.player', 'html5')
220 webpage
, urlh
= self
._download
_webpage
_handle
(
221 f
'{self._BASE_URL}/video/{video_id}/vfilm', video_id
)
223 if 'Ten film jest dostępny dla użytkowników premium' in webpage
:
224 self
.raise_login_required('This video is only available for premium users')
226 if re
.search(r
'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage
):
227 self
.raise_geo_restricted()
229 need_confirm_age
= False
230 if self
._html
_search
_regex
(r
'(<button[^>]+name="[^"]*age_confirm[^"]*")',
231 webpage
, 'birthday validate form', default
=None):
232 webpage
= self
._download
_age
_confirm
_page
(
233 urlh
.url
, video_id
, note
='Confirming age')
234 need_confirm_age
= True
238 uploader
= self
._search
_regex
(r
'''(?x)
239 <(span|meta)[^>]+itemprop=(["\'])author\
2[^
>]*>
240 (?
:<\
1[^
>]*>[^
<]*</\
1>|
(?
!</\
1>)(?
:.|
\n))*?
241 <(span|meta
)[^
>]+itemprop
=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
242 ''', webpage, 'uploader', default=None, group='uploader')
243 average_rating = self._search_regex(
244 (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\
1[^
>]*>(?P
<rating_value
>[0-9.]+)',
245 r'<span
[^
>]+\bclass
=["\']rating["\'][^
>]*>(?P
<rating_value
>[0-9.]+)'), webpage, 'rating
', fatal=False,
246 group='rating_value
')
250 'title
': self._og_search_title(webpage),
251 'description
': self._og_search_description(webpage),
252 'uploader
': uploader,
253 'average_rating
': float_or_none(average_rating),
254 'thumbnail
': self._og_search_thumbnail(webpage),
257 'age_limit
': 18 if need_confirm_age else 0,
260 info = self._search_json_ld(webpage, video_id, default={})
262 # Source: https://www.cda.pl/js/player.js?t=1606154898
264 for p in ('_XDDD
', '_CDA
', '_ADC
', '_CXD
', '_QWE
', '_Q5
', '_IKSDE
'):
266 a = compat_urllib_parse_unquote(a)
270 b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
272 a = a.replace('.cda
.mp4
', '')
273 for p in ('.2cda
.pl
', '.3cda
.pl
'):
274 a = a.replace(p, '.cda
.pl
')
276 a = a.replace('/upstream
', '.mp4
/upstream
')
277 return 'https
://' + a
278 return 'https
://' + a + '.mp4
'
280 def extract_format(page, version):
281 json_str = self._html_search_regex(
282 r'player_data
=(\\?
["\'])(?P<player_data>.+?)\1', page,
283 '%s player_json' % version, fatal=False, group='player_data')
286 player_data = self._parse_json(
287 json_str, '%s player_data' % version, fatal=False)
290 video = player_data.get('video')
291 if not video or 'file' not in video:
292 self.report_warning('Unable to extract %s version information' % version)
294 if video['file'].startswith('uggc'):
295 video['file'] = codecs.decode(video['file'], 'rot_13')
296 if video['file'].endswith('adc.mp4'):
297 video['file'] = video['file'].replace('adc.mp4', '.mp4')
298 elif not video['file'].startswith('http'):
299 video['file'] = decrypt_file(video['file'])
300 video_quality = video.get('quality')
301 qualities = video.get('qualities', {})
302 video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
303 info_dict['formats'].append({
304 'url': video['file'],
305 'format_id': video_quality,
306 'height': int_or_none(video_quality[:-1]),
308 for quality, cda_quality in qualities.items():
309 if quality == video_quality:
311 data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
312 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
313 data = json.dumps(data).encode('utf-8')
314 video_url = self._download_json(
315 f'https://www.cda.pl/video/{video_id}', video_id, headers={
316 'Content-Type': 'application/json',
317 'X-Requested-With': 'XMLHttpRequest'
318 }, data=data, note=f'Fetching {quality} url',
319 errnote=f'Failed to fetch {quality} url', fatal=False)
320 if try_get(video_url, lambda x: x['result']['status']) == 'ok':
321 video_url = try_get(video_url, lambda x: x['result']['resp'])
322 info_dict['formats'].append({
324 'format_id': quality,
325 'height': int_or_none(quality[:-1])
328 if not info_dict['duration']:
329 info_dict['duration'] = parse_duration(video.get('duration'))
331 extract_format(webpage, 'default')
333 for href, resolution in re.findall(
334 r'<a[^>]+data-quality="[^
"]+"[^
>]+href
="([^"]+)"[^>]+class="quality
-btn
"[^>]*>([0-9]+p)',
337 handler = self._download_age_confirm_page
339 handler = self._download_webpage
342 urljoin(self._BASE_URL, href), video_id,
343 'Downloading %s version information' % resolution, fatal=False)
345 # Manually report warning because empty page is returned when
346 # invalid version is requested.
347 self.report_warning('Unable to download %s version information' % resolution)
350 extract_format(webpage, resolution)
352 return merge_dicts(info_dict, info)