10 from .common
import InfoExtractor
11 from ..compat
import compat_ord
, compat_urllib_parse_unquote
27 class CDAIE(InfoExtractor
):
28 _VALID_URL
= r
'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
29 _NETRC_MACHINE
= 'cdapl'
31 _BASE_URL
= 'https://www.cda.pl'
32 _BASE_API_URL
= 'https://api.cda.pl'
34 'Accept': 'application/vnd.cda.public+json',
36 # hardcoded in the app
37 _LOGIN_REQUEST_AUTH
= 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
38 _BEARER_CACHE
= 'cda-bearer'
41 'url': 'http://www.cda.pl/video/5749950c',
42 'md5': '6f844bf51b15f31fae165365707ae970',
47 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
48 'description': 'md5:269ccd135d550da90d1662651fcb9772',
49 'thumbnail': r
're:^https?://.*\.jpg$',
50 'average_rating': float,
53 'upload_date': '20160221',
54 'timestamp': 1456078244,
57 'url': 'http://www.cda.pl/video/57413289',
58 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
62 'title': 'Lądowanie na lotnisku na Maderze',
63 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
64 'thumbnail': r
're:^https?://.*\.jpg$',
65 'uploader': 'crash404',
67 'average_rating': float,
73 'url': 'http://www.cda.pl/video/1273454c4',
77 'title': 'Bronson (2008) napisy HD 1080p',
78 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
80 'uploader': 'boniek61',
81 'thumbnail': r
're:^https?://.*\.jpg$',
85 'average_rating': float,
88 'url': 'http://ebd.cda.pl/0x0/5749950c',
89 'only_matching': True,
92 def _download_age_confirm_page(self
, url
, video_id
, *args
, **kwargs
):
93 form_data
= random_birthday('rok', 'miesiac', 'dzien')
94 form_data
.update({'return': url, 'module': 'video', 'module_id': video_id}
)
95 data
, content_type
= multipart_encode(form_data
)
96 return self
._download
_webpage
(
97 urljoin(url
, '/a/validatebirth'), video_id
, *args
,
100 'Content-Type': content_type
,
103 def _perform_login(self
, username
, password
):
104 app_version
= random
.choice((
105 '1.2.88 build 15306',
106 '1.2.174 build 18469',
108 android_version
= random
.randrange(8, 14)
109 phone_model
= random
.choice((
110 # x-kom.pl top selling Android smartphones, as of 2022-12-26
111 # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
113 'Motorola edge 20 5G',
114 'Motorola edge 30 neo 5G',
116 'OnePlus Nord 2T 5G',
117 'Samsung Galaxy A32 SM‑A325F',
118 'Samsung Galaxy M13',
119 'Samsung Galaxy S20 FE 5G',
121 'Xiaomi POCO M4 Pro',
124 'Xiaomi Redmi 9C NFC',
125 'Xiaomi Redmi Note 10 Pro',
126 'Xiaomi Redmi Note 11 Pro',
127 'Xiaomi Redmi Note 11',
128 'Xiaomi Redmi Note 11S 5G',
129 'Xiaomi Redmi Note 11S',
134 self
._API
_HEADERS
['User-Agent'] = f
'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
136 cached_bearer
= self
.cache
.load(self
._BEARER
_CACHE
, username
) or {}
137 if cached_bearer
.get('valid_until', 0) > datetime
.datetime
.now().timestamp() + 5:
138 self
._API
_HEADERS
['Authorization'] = f
'Bearer {cached_bearer["token"]}'
141 password_hash
= base64
.urlsafe_b64encode(hmac
.new(
142 b
's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
143 ''.join(f
'{bytes((bt & 255, )).hex():0>2}'
144 for bt
in hashlib
.md5(password
.encode()).digest()).encode(),
145 hashlib
.sha256
).digest()).decode().replace('=', '')
147 token_res
= self
._download
_json
(
148 f
'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data
=b
'',
149 headers
={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH}
,
151 'grant_type': 'password',
153 'password': password_hash
,
155 self
.cache
.store(self
._BEARER
_CACHE
, username
, {
156 'token': token_res
['access_token'],
157 'valid_until': token_res
['expires_in'] + datetime
.datetime
.now().timestamp(),
159 self
._API
_HEADERS
['Authorization'] = f
'Bearer {token_res["access_token"]}'
161 def _real_extract(self
, url
):
162 video_id
= self
._match
_id
(url
)
164 if 'Authorization' in self
._API
_HEADERS
:
165 return self
._api
_extract
(video_id
)
167 return self
._web
_extract
(video_id
, url
)
169 def _api_extract(self
, video_id
):
170 meta
= self
._download
_json
(
171 f
'{self._BASE_API_URL}/video/{video_id}', video_id
, headers
=self
._API
_HEADERS
)['video']
173 uploader
= traverse_obj(meta
, 'author', 'login')
176 'url': quality
['file'],
177 'format': quality
.get('title'),
178 'resolution': quality
.get('name'),
179 'height': try_call(lambda: int(quality
['name'][:-1])),
180 'filesize': quality
.get('length'),
181 } for quality
in meta
['qualities'] if quality
.get('file')]
183 if meta
.get('premium') and not meta
.get('premium_free') and not formats
:
184 raise ExtractorError(
185 'Video requires CDA Premium - subscription needed', expected
=True)
189 'title': meta
.get('title'),
190 'description': meta
.get('description'),
191 'uploader': None if uploader
== 'anonim' else uploader
,
192 'average_rating': float_or_none(meta
.get('rating')),
193 'thumbnail': meta
.get('thumb'),
195 'duration': meta
.get('duration'),
196 'age_limit': 18 if meta
.get('for_adults') else 0,
197 'view_count': meta
.get('views'),
200 def _web_extract(self
, video_id
, url
):
201 self
._set
_cookie
('cda.pl', 'cda.player', 'html5')
202 webpage
= self
._download
_webpage
(
203 f
'{self._BASE_URL}/video/{video_id}/vfilm', video_id
)
205 if 'Ten film jest dostępny dla użytkowników premium' in webpage
:
206 self
.raise_login_required('This video is only available for premium users')
208 if re
.search(r
'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage
):
209 self
.raise_geo_restricted()
211 need_confirm_age
= False
212 if self
._html
_search
_regex
(r
'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
213 webpage
, 'birthday validate form', default
=None):
214 webpage
= self
._download
_age
_confirm
_page
(
215 url
, video_id
, note
='Confirming age')
216 need_confirm_age
= True
220 uploader
= self
._search
_regex
(r
'''(?x)
221 <(span|meta)[^>]+itemprop=(["\'])author\
2[^
>]*>
222 (?
:<\
1[^
>]*>[^
<]*</\
1>|
(?
!</\
1>)(?
:.|
\n))*?
223 <(span|meta
)[^
>]+itemprop
=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
224 ''', webpage, 'uploader', default=None, group='uploader')
225 view_count = self._search_regex(
226 r'Odsłony:(?:\s| )*([0-9]+)', webpage,
227 'view_count', default=None)
228 average_rating = self._search_regex(
229 (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\
1[^
>]*>(?P
<rating_value
>[0-9.]+)',
230 r'<span
[^
>]+\bclass
=["\']rating["\'][^
>]*>(?P
<rating_value
>[0-9.]+)'), webpage, 'rating
', fatal=False,
231 group='rating_value
')
235 'title
': self._og_search_title(webpage),
236 'description
': self._og_search_description(webpage),
237 'uploader
': uploader,
238 'view_count
': int_or_none(view_count),
239 'average_rating
': float_or_none(average_rating),
240 'thumbnail
': self._og_search_thumbnail(webpage),
243 'age_limit
': 18 if need_confirm_age else 0,
246 info = self._search_json_ld(webpage, video_id, default={})
248 # Source: https://www.cda.pl/js/player.js?t=1606154898
250 for p in ('_XDDD
', '_CDA
', '_ADC
', '_CXD
', '_QWE
', '_Q5
', '_IKSDE
'):
252 a = compat_urllib_parse_unquote(a)
256 b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
258 a = a.replace('.cda
.mp4
', '')
259 for p in ('.2cda
.pl
', '.3cda
.pl
'):
260 a = a.replace(p, '.cda
.pl
')
262 a = a.replace('/upstream
', '.mp4
/upstream
')
263 return 'https
://' + a
264 return 'https
://' + a + '.mp4
'
266 def extract_format(page, version):
267 json_str = self._html_search_regex(
268 r'player_data
=(\\?
["\'])(?P<player_data>.+?)\1', page,
269 '%s player_json' % version, fatal=False, group='player_data')
272 player_data = self._parse_json(
273 json_str, '%s player_data' % version, fatal=False)
276 video = player_data.get('video')
277 if not video or 'file' not in video:
278 self.report_warning('Unable to extract %s version information' % version)
280 if video['file'].startswith('uggc'):
281 video['file'] = codecs.decode(video['file'], 'rot_13')
282 if video['file'].endswith('adc.mp4'):
283 video['file'] = video['file'].replace('adc.mp4', '.mp4')
284 elif not video['file'].startswith('http'):
285 video['file'] = decrypt_file(video['file'])
286 video_quality = video.get('quality')
287 qualities = video.get('qualities', {})
288 video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
289 info_dict['formats'].append({
290 'url': video['file'],
291 'format_id': video_quality,
292 'height': int_or_none(video_quality[:-1]),
294 for quality, cda_quality in qualities.items():
295 if quality == video_quality:
297 data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
298 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
299 data = json.dumps(data).encode('utf-8')
300 video_url = self._download_json(
301 f'https://www.cda.pl/video/{video_id}', video_id, headers={
302 'Content-Type': 'application/json',
303 'X-Requested-With': 'XMLHttpRequest'
304 }, data=data, note=f'Fetching {quality} url',
305 errnote=f'Failed to fetch {quality} url', fatal=False)
306 if try_get(video_url, lambda x: x['result']['status']) == 'ok':
307 video_url = try_get(video_url, lambda x: x['result']['resp'])
308 info_dict['formats'].append({
310 'format_id': quality,
311 'height': int_or_none(quality[:-1])
314 if not info_dict['duration']:
315 info_dict['duration'] = parse_duration(video.get('duration'))
317 extract_format(webpage, 'default')
319 for href, resolution in re.findall(
320 r'<a[^>]+data-quality="[^
"]+"[^
>]+href
="([^"]+)"[^>]+class="quality
-btn
"[^>]*>([0-9]+p)',
323 handler = self._download_age_confirm_page
325 handler = self._download_webpage
328 urljoin(self._BASE_URL, href), video_id,
329 'Downloading %s version information' % resolution, fatal=False)
331 # Manually report warning because empty page is returned when
332 # invalid version is requested.
333 self.report_warning('Unable to download %s version information' % resolution)
336 extract_format(webpage, resolution)
338 return merge_dicts(info_dict, info)