]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/cda.py
[compat] Remove more functions
[yt-dlp.git] / yt_dlp / extractor / cda.py
CommitLineData
fdeea726 1import codecs
05664a2f 2import json
ac668111 3import re
8b0d7a66
KM
4
5from .common import InfoExtractor
ac668111 6from ..compat import compat_ord, compat_urllib_parse_unquote
8b0d7a66 7from ..utils import (
8b0d7a66 8 ExtractorError,
577281b0
KM
9 float_or_none,
10 int_or_none,
38d70284 11 merge_dicts,
0c265486 12 multipart_encode,
577281b0 13 parse_duration,
0c265486 14 random_birthday,
05664a2f 15 try_get,
ac668111 16 urljoin,
8b0d7a66
KM
17)
18
19
20class CDAIE(InfoExtractor):
f1ced6df 21 _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
577281b0 22 _BASE_URL = 'http://www.cda.pl/'
f1ced6df
S
23 _TESTS = [{
24 'url': 'http://www.cda.pl/video/5749950c',
25 'md5': '6f844bf51b15f31fae165365707ae970',
26 'info_dict': {
27 'id': '5749950c',
28 'ext': 'mp4',
29 'height': 720,
30 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
577281b0 31 'description': 'md5:269ccd135d550da90d1662651fcb9772',
ec85ded8 32 'thumbnail': r're:^https?://.*\.jpg$',
577281b0 33 'average_rating': float,
0c265486
YCH
34 'duration': 39,
35 'age_limit': 0,
05664a2f 36 'upload_date': '20160221',
37 'timestamp': 1456078244,
f1ced6df
S
38 }
39 }, {
40 'url': 'http://www.cda.pl/video/57413289',
41 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
42 'info_dict': {
43 'id': '57413289',
44 'ext': 'mp4',
45 'title': 'Lądowanie na lotnisku na Maderze',
577281b0 46 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
ec85ded8 47 'thumbnail': r're:^https?://.*\.jpg$',
577281b0
KM
48 'uploader': 'crash404',
49 'view_count': int,
50 'average_rating': float,
0c265486
YCH
51 'duration': 137,
52 'age_limit': 0,
8b0d7a66 53 }
0c265486
YCH
54 }, {
55 # Age-restricted
56 'url': 'http://www.cda.pl/video/1273454c4',
57 'info_dict': {
58 'id': '1273454c4',
59 'ext': 'mp4',
60 'title': 'Bronson (2008) napisy HD 1080p',
61 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
62 'height': 1080,
63 'uploader': 'boniek61',
64 'thumbnail': r're:^https?://.*\.jpg$',
65 'duration': 5554,
66 'age_limit': 18,
67 'view_count': int,
68 'average_rating': float,
69 },
f1ced6df
S
70 }, {
71 'url': 'http://ebd.cda.pl/0x0/5749950c',
72 'only_matching': True,
73 }]
8b0d7a66 74
0c265486
YCH
75 def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
76 form_data = random_birthday('rok', 'miesiac', 'dzien')
77 form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
78 data, content_type = multipart_encode(form_data)
79 return self._download_webpage(
80 urljoin(url, '/a/validatebirth'), video_id, *args,
81 data=data, headers={
82 'Referer': url,
83 'Content-Type': content_type,
84 }, **kwargs)
85
8b0d7a66
KM
86 def _real_extract(self, url):
87 video_id = self._match_id(url)
577281b0
KM
88 self._set_cookie('cda.pl', 'cda.player', 'html5')
89 webpage = self._download_webpage(
90 self._BASE_URL + '/video/' + video_id, video_id)
8b0d7a66
KM
91
92 if 'Ten film jest dostępny dla użytkowników premium' in webpage:
93 raise ExtractorError('This video is only available for premium users.', expected=True)
94
cc2db878 95 if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
96 self.raise_geo_restricted()
97
0c265486 98 need_confirm_age = False
2181983a 99 if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
0c265486
YCH
100 webpage, 'birthday validate form', default=None):
101 webpage = self._download_age_confirm_page(
102 url, video_id, note='Confirming age')
103 need_confirm_age = True
104
8b0d7a66
KM
105 formats = []
106
577281b0
KM
107 uploader = self._search_regex(r'''(?x)
108 <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
109 (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
110 <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
111 ''', webpage, 'uploader', default=None, group='uploader')
112 view_count = self._search_regex(
113 r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
114 'view_count', default=None)
115 average_rating = self._search_regex(
38d70284 116 (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
117 r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
118 group='rating_value')
577281b0 119
f1ced6df
S
120 info_dict = {
121 'id': video_id,
577281b0
KM
122 'title': self._og_search_title(webpage),
123 'description': self._og_search_description(webpage),
124 'uploader': uploader,
125 'view_count': int_or_none(view_count),
126 'average_rating': float_or_none(average_rating),
127 'thumbnail': self._og_search_thumbnail(webpage),
f1ced6df
S
128 'formats': formats,
129 'duration': None,
0c265486 130 'age_limit': 18 if need_confirm_age else 0,
f1ced6df 131 }
8b0d7a66 132
41d1cca3 133 info = self._search_json_ld(webpage, video_id, default={})
134
38d70284 135 # Source: https://www.cda.pl/js/player.js?t=1606154898
136 def decrypt_file(a):
137 for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
138 a = a.replace(p, '')
139 a = compat_urllib_parse_unquote(a)
140 b = []
141 for c in a:
142 f = compat_ord(c)
ac668111 143 b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
38d70284 144 a = ''.join(b)
145 a = a.replace('.cda.mp4', '')
146 for p in ('.2cda.pl', '.3cda.pl'):
147 a = a.replace(p, '.cda.pl')
148 if '/upstream' in a:
149 a = a.replace('/upstream', '.mp4/upstream')
150 return 'https://' + a
151 return 'https://' + a + '.mp4'
152
f1ced6df 153 def extract_format(page, version):
f8f18f33 154 json_str = self._html_search_regex(
577281b0
KM
155 r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
156 '%s player_json' % version, fatal=False, group='player_data')
157 if not json_str:
158 return
159 player_data = self._parse_json(
160 json_str, '%s player_data' % version, fatal=False)
161 if not player_data:
162 return
163 video = player_data.get('video')
164 if not video or 'file' not in video:
165 self.report_warning('Unable to extract %s version information' % version)
f1ced6df 166 return
fdeea726
AS
167 if video['file'].startswith('uggc'):
168 video['file'] = codecs.decode(video['file'], 'rot_13')
169 if video['file'].endswith('adc.mp4'):
170 video['file'] = video['file'].replace('adc.mp4', '.mp4')
38d70284 171 elif not video['file'].startswith('http'):
172 video['file'] = decrypt_file(video['file'])
05664a2f 173 video_quality = video.get('quality')
174 qualities = video.get('qualities', {})
175 video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
176 info_dict['formats'].append({
577281b0 177 'url': video['file'],
05664a2f 178 'format_id': video_quality,
179 'height': int_or_none(video_quality[:-1]),
180 })
181 for quality, cda_quality in qualities.items():
182 if quality == video_quality:
183 continue
184 data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
185 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
186 data = json.dumps(data).encode('utf-8')
187 video_url = self._download_json(
188 f'https://www.cda.pl/video/{video_id}', video_id, headers={
189 'Content-Type': 'application/json',
190 'X-Requested-With': 'XMLHttpRequest'
191 }, data=data, note=f'Fetching {quality} url',
192 errnote=f'Failed to fetch {quality} url', fatal=False)
193 if try_get(video_url, lambda x: x['result']['status']) == 'ok':
194 video_url = try_get(video_url, lambda x: x['result']['resp'])
195 info_dict['formats'].append({
196 'url': video_url,
197 'format_id': quality,
198 'height': int_or_none(quality[:-1])
199 })
200
f1ced6df 201 if not info_dict['duration']:
577281b0 202 info_dict['duration'] = parse_duration(video.get('duration'))
f1ced6df
S
203
204 extract_format(webpage, 'default')
205
206 for href, resolution in re.findall(
207 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
208 webpage):
0c265486
YCH
209 if need_confirm_age:
210 handler = self._download_age_confirm_page
211 else:
212 handler = self._download_webpage
213
214 webpage = handler(
41d1cca3 215 urljoin(self._BASE_URL, href), video_id,
577281b0 216 'Downloading %s version information' % resolution, fatal=False)
8b0d7a66 217 if not webpage:
f1ced6df
S
218 # Manually report warning because empty page is returned when
219 # invalid version is requested.
220 self.report_warning('Unable to download %s version information' % resolution)
8b0d7a66 221 continue
0c265486 222
f1ced6df 223 extract_format(webpage, resolution)
8b0d7a66
KM
224
225 self._sort_formats(formats)
226
38d70284 227 return merge_dicts(info_dict, info)