[yt-dlp.git] / yt_dlp / extractor / cda.py

import base64
import codecs
import datetime as dt
import hashlib
import hmac
import json
import random
import re

from .common import InfoExtractor
from ..compat import compat_ord, compat_urllib_parse_unquote
from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
    merge_dicts,
    multipart_encode,
    parse_duration,
    traverse_obj,
    try_call,
    try_get,
    urljoin,
)


class CDAIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
    _NETRC_MACHINE = 'cdapl'

    _BASE_URL = 'https://www.cda.pl'
    _BASE_API_URL = 'https://api.cda.pl'
    _API_HEADERS = {
        'Accept': 'application/vnd.cda.public+json',
    }
    # hardcoded in the app
    _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
    _BEARER_CACHE = 'cda-bearer'

    _TESTS = [{
        'url': 'http://www.cda.pl/video/5749950c',
        'md5': '6f844bf51b15f31fae165365707ae970',
        'info_dict': {
            'id': '5749950c',
            'ext': 'mp4',
            'height': 720,
            'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
            'description': 'md5:269ccd135d550da90d1662651fcb9772',
            'thumbnail': r're:^https?://.*\.jpg$',
            'average_rating': float,
            'duration': 39,
            'age_limit': 0,
            'upload_date': '20160221',
            'timestamp': 1456078244,
        }
    }, {
        'url': 'http://www.cda.pl/video/57413289',
        'md5': 'a88828770a8310fc00be6c95faf7f4d5',
        'info_dict': {
            'id': '57413289',
            'ext': 'mp4',
            'title': 'Lądowanie na lotnisku na Maderze',
            'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'crash404',
            'average_rating': float,
            'duration': 137,
            'age_limit': 0,
            'upload_date': '20160220',
            'timestamp': 1455968218,
        }
    }, {
        # Age-restricted with vfilm redirection
        'url': 'https://www.cda.pl/video/8753244c4',
        'md5': 'd8eeb83d63611289507010d3df3bb8b3',
        'info_dict': {
            'id': '8753244c4',
            'ext': 'mp4',
            'title': '[18+] Bez Filtra: Rezerwowe Psy czyli...  najwulgarniejsza polska gra?',
            'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
            'height': 1080,
            'uploader': 'arhn eu',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 991,
            'age_limit': 18,
            'average_rating': float,
            'timestamp': 1633888264,
            'upload_date': '20211010',
        }
    }, {
        # Age-restricted without vfilm redirection
        'url': 'https://www.cda.pl/video/17028157b8',
        'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
        'info_dict': {
            'id': '17028157b8',
            'ext': 'mp4',
            'title': 'STENDUPY MICHAŁ OGIŃSKI',
            'description': 'md5:5851f3272bfc31f762d616040a1d609a',
            'height': 480,
            'uploader': 'oginski',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 18855,
            'age_limit': 18,
            'average_rating': float,
            'timestamp': 1699705901,
            'upload_date': '20231111',
        }
    }, {
        'url': 'http://ebd.cda.pl/0x0/5749950c',
        'only_matching': True,
    }]

    def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
        data, content_type = multipart_encode({'age_confirm': ''})
        return self._download_webpage(
            url, video_id, *args,
            data=data, headers={
                'Referer': url,
                'Content-Type': content_type,
            }, **kwargs)

    def _perform_login(self, username, password):
        app_version = random.choice((
            '1.2.88 build 15306',
            '1.2.174 build 18469',
        ))
        android_version = random.randrange(8, 14)
        phone_model = random.choice((
            # x-kom.pl top selling Android smartphones, as of 2022-12-26
            # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
            'ASUS ZenFone 8',
            'Motorola edge 20 5G',
            'Motorola edge 30 neo 5G',
            'Motorola moto g22',
            'OnePlus Nord 2T 5G',
            'Samsung Galaxy A32 SM‑A325F',
            'Samsung Galaxy M13',
            'Samsung Galaxy S20 FE 5G',
            'Xiaomi 11T',
            'Xiaomi POCO M4 Pro',
            'Xiaomi Redmi 10',
            'Xiaomi Redmi 10C',
            'Xiaomi Redmi 9C NFC',
            'Xiaomi Redmi Note 10 Pro',
            'Xiaomi Redmi Note 11 Pro',
            'Xiaomi Redmi Note 11',
            'Xiaomi Redmi Note 11S 5G',
            'Xiaomi Redmi Note 11S',
            'realme 10',
            'realme 9 Pro+',
            'vivo Y33s',
        ))
        self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'

        cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
        if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5:
            self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
            return

        password_hash = base64.urlsafe_b64encode(hmac.new(
            b's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
            ''.join(f'{bytes((bt & 255, )).hex():0>2}'
                    for bt in hashlib.md5(password.encode()).digest()).encode(),
            hashlib.sha256).digest()).decode().replace('=', '')

        token_res = self._download_json(
            f'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data=b'',
            headers={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH},
            query={
                'grant_type': 'password',
                'login': username,
                'password': password_hash,
            })
        self.cache.store(self._BEARER_CACHE, username, {
            'token': token_res['access_token'],
            'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(),
        })
        self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'

    def _real_extract(self, url):
        video_id = self._match_id(url)

        if 'Authorization' in self._API_HEADERS:
            return self._api_extract(video_id)
        else:
            return self._web_extract(video_id)

    def _api_extract(self, video_id):
        meta = self._download_json(
            f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']

        uploader = traverse_obj(meta, 'author', 'login')

        formats = [{
            'url': quality['file'],
            'format': quality.get('title'),
            'resolution': quality.get('name'),
            'height': try_call(lambda: int(quality['name'][:-1])),
            'filesize': quality.get('length'),
        } for quality in meta['qualities'] if quality.get('file')]

        if meta.get('premium') and not meta.get('premium_free') and not formats:
            raise ExtractorError(
                'Video requires CDA Premium - subscription needed', expected=True)

        return {
            'id': video_id,
            'title': meta.get('title'),
            'description': meta.get('description'),
            'uploader': None if uploader == 'anonim' else uploader,
            'average_rating': float_or_none(meta.get('rating')),
            'thumbnail': meta.get('thumb'),
            'formats': formats,
            'duration': meta.get('duration'),
            'age_limit': 18 if meta.get('for_adults') else 0,
            'view_count': meta.get('views'),
        }

    def _web_extract(self, video_id):
        self._set_cookie('cda.pl', 'cda.player', 'html5')
        webpage, urlh = self._download_webpage_handle(
            f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)

        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
            self.raise_login_required('This video is only available for premium users')

        if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
            self.raise_geo_restricted()

        need_confirm_age = False
        if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
                                   webpage, 'birthday validate form', default=None):
            webpage = self._download_age_confirm_page(
                urlh.url, video_id, note='Confirming age')
            need_confirm_age = True

        formats = []

        uploader = self._search_regex(r'''(?x)
            <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
            (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
            <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
        ''', webpage, 'uploader', default=None, group='uploader')
        average_rating = self._search_regex(
            (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
             r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
            group='rating_value')

        info_dict = {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'uploader': uploader,
            'average_rating': float_or_none(average_rating),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
            'duration': None,
            'age_limit': 18 if need_confirm_age else 0,
        }

        info = self._search_json_ld(webpage, video_id, default={})

        # Source: https://www.cda.pl/js/player.js?t=1606154898
        def decrypt_file(a):
            for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
                a = a.replace(p, '')
            a = compat_urllib_parse_unquote(a)
            b = []
            for c in a:
                f = compat_ord(c)
                b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
            a = ''.join(b)
            a = a.replace('.cda.mp4', '')
            for p in ('.2cda.pl', '.3cda.pl'):
                a = a.replace(p, '.cda.pl')
            if '/upstream' in a:
                a = a.replace('/upstream', '.mp4/upstream')
                return 'https://' + a
            return 'https://' + a + '.mp4'

        def extract_format(page, version):
            json_str = self._html_search_regex(
                r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
                '%s player_json' % version, fatal=False, group='player_data')
            if not json_str:
                return
            player_data = self._parse_json(
                json_str, '%s player_data' % version, fatal=False)
            if not player_data:
                return
            video = player_data.get('video')
            if not video or 'file' not in video:
                self.report_warning('Unable to extract %s version information' % version)
                return
            if video['file'].startswith('uggc'):
                video['file'] = codecs.decode(video['file'], 'rot_13')
                if video['file'].endswith('adc.mp4'):
                    video['file'] = video['file'].replace('adc.mp4', '.mp4')
            elif not video['file'].startswith('http'):
                video['file'] = decrypt_file(video['file'])
            video_quality = video.get('quality')
            qualities = video.get('qualities', {})
            video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
            info_dict['formats'].append({
                'url': video['file'],
                'format_id': video_quality,
                'height': int_or_none(video_quality[:-1]),
            })
            for quality, cda_quality in qualities.items():
                if quality == video_quality:
                    continue
                data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
                        'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
                data = json.dumps(data).encode('utf-8')
                video_url = self._download_json(
                    f'https://www.cda.pl/video/{video_id}', video_id, headers={
                        'Content-Type': 'application/json',
                        'X-Requested-With': 'XMLHttpRequest'
                    }, data=data, note=f'Fetching {quality} url',
                    errnote=f'Failed to fetch {quality} url', fatal=False)
                if try_get(video_url, lambda x: x['result']['status']) == 'ok':
                    video_url = try_get(video_url, lambda x: x['result']['resp'])
                    info_dict['formats'].append({
                        'url': video_url,
                        'format_id': quality,
                        'height': int_or_none(quality[:-1])
                    })

            if not info_dict['duration']:
                info_dict['duration'] = parse_duration(video.get('duration'))

        extract_format(webpage, 'default')

        for href, resolution in re.findall(
                r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
                webpage):
            if need_confirm_age:
                handler = self._download_age_confirm_page
            else:
                handler = self._download_webpage

            webpage = handler(
                urljoin(self._BASE_URL, href), video_id,
                'Downloading %s version information' % resolution, fatal=False)
            if not webpage:
                # Manually report warning because empty page is returned when
                # invalid version is requested.
                self.report_warning('Unable to download %s version information' % resolution)
                continue

            extract_format(webpage, resolution)

        return merge_dicts(info_dict, info)
Commit	Line	Data
34f00179	1	import base64
fdeea726	2	import codecs
c305a25c	3	import datetime as dt
34f00179	4	import hashlib
34f00179	5	import hmac
05664a2f	6	import json
da8d2de2	7	import random
ac668111	8	import re
8b0d7a66 KM	9
8b0d7a66 KM	10	from .common import InfoExtractor
ac668111	11	from ..compat import compat_ord, compat_urllib_parse_unquote
8b0d7a66	12	from ..utils import (
8b0d7a66	13	ExtractorError,
577281b0 KM	14	float_or_none,
577281b0 KM	15	int_or_none,
38d70284	16	merge_dicts,
0c265486	17	multipart_encode,
577281b0	18	parse_duration,
34f00179	19	traverse_obj,
34f00179	20	try_call,
05664a2f	21	try_get,
ac668111	22	urljoin,
8b0d7a66 KM	23	)
	24
	25
	26	class CDAIE(InfoExtractor):
f1ced6df	27	_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video\|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
34f00179	28	_NETRC_MACHINE = 'cdapl'
34f00179	29
da8d2de2	30	_BASE_URL = 'https://www.cda.pl'
34f00179	31	_BASE_API_URL = 'https://api.cda.pl'
	32	_API_HEADERS = {
	33	'Accept': 'application/vnd.cda.public+json',
34f00179	34	}
	35	# hardcoded in the app
	36	_LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
	37	_BEARER_CACHE = 'cda-bearer'
	38
f1ced6df S	39	_TESTS = [{
	40	'url': 'http://www.cda.pl/video/5749950c',
	41	'md5': '6f844bf51b15f31fae165365707ae970',
	42	'info_dict': {
	43	'id': '5749950c',
	44	'ext': 'mp4',
	45	'height': 720,
	46	'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
577281b0	47	'description': 'md5:269ccd135d550da90d1662651fcb9772',
ec85ded8	48	'thumbnail': r're:^https?://.*\.jpg$',
577281b0	49	'average_rating': float,
0c265486 YCH	50	'duration': 39,
0c265486 YCH	51	'age_limit': 0,
05664a2f	52	'upload_date': '20160221',
05664a2f	53	'timestamp': 1456078244,
f1ced6df S	54	}
	55	}, {
	56	'url': 'http://www.cda.pl/video/57413289',
	57	'md5': 'a88828770a8310fc00be6c95faf7f4d5',
	58	'info_dict': {
	59	'id': '57413289',
	60	'ext': 'mp4',
	61	'title': 'Lądowanie na lotnisku na Maderze',
577281b0	62	'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
ec85ded8	63	'thumbnail': r're:^https?://.*\.jpg$',
577281b0	64	'uploader': 'crash404',
577281b0	65	'average_rating': float,
0c265486 YCH	66	'duration': 137,
0c265486 YCH	67	'age_limit': 0,
6d8a53d8 P	68	'upload_date': '20160220',
6d8a53d8 P	69	'timestamp': 1455968218,
8b0d7a66	70	}
0c265486	71	}, {
6d8a53d8 P	72	# Age-restricted with vfilm redirection
	73	'url': 'https://www.cda.pl/video/8753244c4',
	74	'md5': 'd8eeb83d63611289507010d3df3bb8b3',
0c265486	75	'info_dict': {
6d8a53d8	76	'id': '8753244c4',
0c265486	77	'ext': 'mp4',
6d8a53d8 P	78	'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
6d8a53d8 P	79	'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
0c265486	80	'height': 1080,
6d8a53d8	81	'uploader': 'arhn eu',
0c265486	82	'thumbnail': r're:^https?://.*\.jpg$',
6d8a53d8	83	'duration': 991,
0c265486	84	'age_limit': 18,
0c265486	85	'average_rating': float,
6d8a53d8 P	86	'timestamp': 1633888264,
	87	'upload_date': '20211010',
	88	}
	89	}, {
	90	# Age-restricted without vfilm redirection
	91	'url': 'https://www.cda.pl/video/17028157b8',
	92	'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
	93	'info_dict': {
	94	'id': '17028157b8',
	95	'ext': 'mp4',
	96	'title': 'STENDUPY MICHAŁ OGIŃSKI',
	97	'description': 'md5:5851f3272bfc31f762d616040a1d609a',
	98	'height': 480,
	99	'uploader': 'oginski',
	100	'thumbnail': r're:^https?://.*\.jpg$',
	101	'duration': 18855,
	102	'age_limit': 18,
	103	'average_rating': float,
	104	'timestamp': 1699705901,
	105	'upload_date': '20231111',
	106	}
f1ced6df S	107	}, {
	108	'url': 'http://ebd.cda.pl/0x0/5749950c',
	109	'only_matching': True,
	110	}]
8b0d7a66	111
0c265486	112	def _download_age_confirm_page(self, url, video_id, args, *kwargs):
6d8a53d8	113	data, content_type = multipart_encode({'age_confirm': ''})
0c265486	114	return self._download_webpage(
6d8a53d8	115	url, video_id, *args,
0c265486 YCH	116	data=data, headers={
	117	'Referer': url,
	118	'Content-Type': content_type,
	119	}, **kwargs)
	120
34f00179	121	def _perform_login(self, username, password):
da8d2de2	122	app_version = random.choice((
	123	'1.2.88 build 15306',
	124	'1.2.174 build 18469',
	125	))
	126	android_version = random.randrange(8, 14)
	127	phone_model = random.choice((
	128	# x-kom.pl top selling Android smartphones, as of 2022-12-26
	129	# https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
	130	'ASUS ZenFone 8',
	131	'Motorola edge 20 5G',
	132	'Motorola edge 30 neo 5G',
	133	'Motorola moto g22',
	134	'OnePlus Nord 2T 5G',
	135	'Samsung Galaxy A32 SM‑A325F',
	136	'Samsung Galaxy M13',
	137	'Samsung Galaxy S20 FE 5G',
	138	'Xiaomi 11T',
	139	'Xiaomi POCO M4 Pro',
	140	'Xiaomi Redmi 10',
	141	'Xiaomi Redmi 10C',
	142	'Xiaomi Redmi 9C NFC',
	143	'Xiaomi Redmi Note 10 Pro',
	144	'Xiaomi Redmi Note 11 Pro',
	145	'Xiaomi Redmi Note 11',
	146	'Xiaomi Redmi Note 11S 5G',
	147	'Xiaomi Redmi Note 11S',
	148	'realme 10',
	149	'realme 9 Pro+',
	150	'vivo Y33s',
	151	))
	152	self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
	153
34f00179	154	cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
c305a25c	155	if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5:
34f00179	156	self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
	157	return
	158
	159	password_hash = base64.urlsafe_b64encode(hmac.new(
	160	b's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
	161	''.join(f'{bytes((bt & 255, )).hex():0>2}'
	162	for bt in hashlib.md5(password.encode()).digest()).encode(),
	163	hashlib.sha256).digest()).decode().replace('=', '')
	164
	165	token_res = self._download_json(
	166	f'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data=b'',
	167	headers={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH},
	168	query={
	169	'grant_type': 'password',
	170	'login': username,
	171	'password': password_hash,
	172	})
	173	self.cache.store(self._BEARER_CACHE, username, {
	174	'token': token_res['access_token'],
c305a25c	175	'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(),
34f00179	176	})
	177	self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
	178
8b0d7a66 KM	179	def _real_extract(self, url):
8b0d7a66 KM	180	video_id = self._match_id(url)
34f00179	181
	182	if 'Authorization' in self._API_HEADERS:
	183	return self._api_extract(video_id)
	184	else:
6d8a53d8	185	return self._web_extract(video_id)
34f00179	186
	187	def _api_extract(self, video_id):
	188	meta = self._download_json(
	189	f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
	190
34f00179	191	uploader = traverse_obj(meta, 'author', 'login')
	192
	193	formats = [{
	194	'url': quality['file'],
	195	'format': quality.get('title'),
	196	'resolution': quality.get('name'),
	197	'height': try_call(lambda: int(quality['name'][:-1])),
	198	'filesize': quality.get('length'),
	199	} for quality in meta['qualities'] if quality.get('file')]
	200
da8d2de2	201	if meta.get('premium') and not meta.get('premium_free') and not formats:
	202	raise ExtractorError(
	203	'Video requires CDA Premium - subscription needed', expected=True)
	204
34f00179	205	return {
	206	'id': video_id,
	207	'title': meta.get('title'),
	208	'description': meta.get('description'),
	209	'uploader': None if uploader == 'anonim' else uploader,
	210	'average_rating': float_or_none(meta.get('rating')),
	211	'thumbnail': meta.get('thumb'),
	212	'formats': formats,
	213	'duration': meta.get('duration'),
	214	'age_limit': 18 if meta.get('for_adults') else 0,
	215	'view_count': meta.get('views'),
	216	}
	217
6d8a53d8	218	def _web_extract(self, video_id):
577281b0	219	self._set_cookie('cda.pl', 'cda.player', 'html5')
6d8a53d8	220	webpage, urlh = self._download_webpage_handle(
da8d2de2	221	f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
8b0d7a66 KM	222
8b0d7a66 KM	223	if 'Ten film jest dostępny dla użytkowników premium' in webpage:
da8d2de2	224	self.raise_login_required('This video is only available for premium users')
8b0d7a66	225
cc2db878	226	if re.search(r'niedostępn[ey] w(?: \|\s+)Twoim kraju\s*<', webpage):
	227	self.raise_geo_restricted()
	228
0c265486	229	need_confirm_age = False
6d8a53d8	230	if self._html_search_regex(r'(<button[^>]+name="[^"]age_confirm[^"]")',
0c265486 YCH	231	webpage, 'birthday validate form', default=None):
0c265486 YCH	232	webpage = self._download_age_confirm_page(
6d8a53d8	233	urlh.url, video_id, note='Confirming age')
0c265486 YCH	234	need_confirm_age = True
0c265486 YCH	235
8b0d7a66 KM	236	formats = []
8b0d7a66 KM	237
577281b0 KM	238	uploader = self._search_regex(r'''(?x)
	239	<(span\|meta)[^>]+itemprop=(["\'])author\2[^>]*>
	240	(?:<\1[^>]>[^<]</\1>\|(?!</\1>)(?:.\|\n))*?
	241	<(span\|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
	242	''', webpage, 'uploader', default=None, group='uploader')
577281b0	243	average_rating = self._search_regex(
38d70284	244	(r'<(?:span\|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
	245	r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
	246	group='rating_value')
577281b0	247
f1ced6df S	248	info_dict = {
f1ced6df S	249	'id': video_id,
577281b0 KM	250	'title': self._og_search_title(webpage),
	251	'description': self._og_search_description(webpage),
	252	'uploader': uploader,
577281b0 KM	253	'average_rating': float_or_none(average_rating),
577281b0 KM	254	'thumbnail': self._og_search_thumbnail(webpage),
f1ced6df S	255	'formats': formats,
f1ced6df S	256	'duration': None,
0c265486	257	'age_limit': 18 if need_confirm_age else 0,
f1ced6df	258	}
8b0d7a66	259
41d1cca3	260	info = self._search_json_ld(webpage, video_id, default={})
41d1cca3	261
38d70284	262	# Source: https://www.cda.pl/js/player.js?t=1606154898
	263	def decrypt_file(a):
	264	for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
	265	a = a.replace(p, '')
	266	a = compat_urllib_parse_unquote(a)
	267	b = []
	268	for c in a:
	269	f = compat_ord(c)
ac668111	270	b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
38d70284	271	a = ''.join(b)
	272	a = a.replace('.cda.mp4', '')
	273	for p in ('.2cda.pl', '.3cda.pl'):
	274	a = a.replace(p, '.cda.pl')
	275	if '/upstream' in a:
	276	a = a.replace('/upstream', '.mp4/upstream')
	277	return 'https://' + a
	278	return 'https://' + a + '.mp4'
	279
f1ced6df	280	def extract_format(page, version):
f8f18f33	281	json_str = self._html_search_regex(
577281b0 KM	282	r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
	283	'%s player_json' % version, fatal=False, group='player_data')
	284	if not json_str:
	285	return
	286	player_data = self._parse_json(
	287	json_str, '%s player_data' % version, fatal=False)
	288	if not player_data:
	289	return
	290	video = player_data.get('video')
	291	if not video or 'file' not in video:
	292	self.report_warning('Unable to extract %s version information' % version)
f1ced6df	293	return
fdeea726 AS	294	if video['file'].startswith('uggc'):
	295	video['file'] = codecs.decode(video['file'], 'rot_13')
	296	if video['file'].endswith('adc.mp4'):
	297	video['file'] = video['file'].replace('adc.mp4', '.mp4')
38d70284	298	elif not video['file'].startswith('http'):
38d70284	299	video['file'] = decrypt_file(video['file'])
05664a2f	300	video_quality = video.get('quality')
	301	qualities = video.get('qualities', {})
	302	video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
	303	info_dict['formats'].append({
577281b0	304	'url': video['file'],
05664a2f	305	'format_id': video_quality,
	306	'height': int_or_none(video_quality[:-1]),
	307	})
	308	for quality, cda_quality in qualities.items():
	309	if quality == video_quality:
	310	continue
	311	data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
	312	'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
	313	data = json.dumps(data).encode('utf-8')
	314	video_url = self._download_json(
	315	f'https://www.cda.pl/video/{video_id}', video_id, headers={
	316	'Content-Type': 'application/json',
	317	'X-Requested-With': 'XMLHttpRequest'
	318	}, data=data, note=f'Fetching {quality} url',
	319	errnote=f'Failed to fetch {quality} url', fatal=False)
	320	if try_get(video_url, lambda x: x['result']['status']) == 'ok':
	321	video_url = try_get(video_url, lambda x: x['result']['resp'])
	322	info_dict['formats'].append({
	323	'url': video_url,
	324	'format_id': quality,
	325	'height': int_or_none(quality[:-1])
	326	})
	327
f1ced6df	328	if not info_dict['duration']:
577281b0	329	info_dict['duration'] = parse_duration(video.get('duration'))
f1ced6df S	330
	331	extract_format(webpage, 'default')
	332
	333	for href, resolution in re.findall(
	334	r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
	335	webpage):
0c265486 YCH	336	if need_confirm_age:
	337	handler = self._download_age_confirm_page
	338	else:
	339	handler = self._download_webpage
	340
	341	webpage = handler(
41d1cca3	342	urljoin(self._BASE_URL, href), video_id,
577281b0	343	'Downloading %s version information' % resolution, fatal=False)
8b0d7a66	344	if not webpage:
f1ced6df S	345	# Manually report warning because empty page is returned when
	346	# invalid version is requested.
	347	self.report_warning('Unable to download %s version information' % resolution)
8b0d7a66	348	continue
0c265486	349
f1ced6df	350	extract_format(webpage, resolution)
8b0d7a66	351
38d70284	352	return merge_dicts(info_dict, info)