]>
Commit | Line | Data |
---|---|---|
1 | import codecs | |
2 | import json | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..compat import compat_ord, compat_urllib_parse_unquote | |
7 | from ..utils import ( | |
8 | ExtractorError, | |
9 | float_or_none, | |
10 | int_or_none, | |
11 | merge_dicts, | |
12 | multipart_encode, | |
13 | parse_duration, | |
14 | random_birthday, | |
15 | try_get, | |
16 | urljoin, | |
17 | ) | |
18 | ||
19 | ||
20 | class CDAIE(InfoExtractor): | |
21 | _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' | |
22 | _BASE_URL = 'http://www.cda.pl/' | |
23 | _TESTS = [{ | |
24 | 'url': 'http://www.cda.pl/video/5749950c', | |
25 | 'md5': '6f844bf51b15f31fae165365707ae970', | |
26 | 'info_dict': { | |
27 | 'id': '5749950c', | |
28 | 'ext': 'mp4', | |
29 | 'height': 720, | |
30 | 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', | |
31 | 'description': 'md5:269ccd135d550da90d1662651fcb9772', | |
32 | 'thumbnail': r're:^https?://.*\.jpg$', | |
33 | 'average_rating': float, | |
34 | 'duration': 39, | |
35 | 'age_limit': 0, | |
36 | 'upload_date': '20160221', | |
37 | 'timestamp': 1456078244, | |
38 | } | |
39 | }, { | |
40 | 'url': 'http://www.cda.pl/video/57413289', | |
41 | 'md5': 'a88828770a8310fc00be6c95faf7f4d5', | |
42 | 'info_dict': { | |
43 | 'id': '57413289', | |
44 | 'ext': 'mp4', | |
45 | 'title': 'Lądowanie na lotnisku na Maderze', | |
46 | 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', | |
47 | 'thumbnail': r're:^https?://.*\.jpg$', | |
48 | 'uploader': 'crash404', | |
49 | 'view_count': int, | |
50 | 'average_rating': float, | |
51 | 'duration': 137, | |
52 | 'age_limit': 0, | |
53 | } | |
54 | }, { | |
55 | # Age-restricted | |
56 | 'url': 'http://www.cda.pl/video/1273454c4', | |
57 | 'info_dict': { | |
58 | 'id': '1273454c4', | |
59 | 'ext': 'mp4', | |
60 | 'title': 'Bronson (2008) napisy HD 1080p', | |
61 | 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', | |
62 | 'height': 1080, | |
63 | 'uploader': 'boniek61', | |
64 | 'thumbnail': r're:^https?://.*\.jpg$', | |
65 | 'duration': 5554, | |
66 | 'age_limit': 18, | |
67 | 'view_count': int, | |
68 | 'average_rating': float, | |
69 | }, | |
70 | }, { | |
71 | 'url': 'http://ebd.cda.pl/0x0/5749950c', | |
72 | 'only_matching': True, | |
73 | }] | |
74 | ||
75 | def _download_age_confirm_page(self, url, video_id, *args, **kwargs): | |
76 | form_data = random_birthday('rok', 'miesiac', 'dzien') | |
77 | form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) | |
78 | data, content_type = multipart_encode(form_data) | |
79 | return self._download_webpage( | |
80 | urljoin(url, '/a/validatebirth'), video_id, *args, | |
81 | data=data, headers={ | |
82 | 'Referer': url, | |
83 | 'Content-Type': content_type, | |
84 | }, **kwargs) | |
85 | ||
86 | def _real_extract(self, url): | |
87 | video_id = self._match_id(url) | |
88 | self._set_cookie('cda.pl', 'cda.player', 'html5') | |
89 | webpage = self._download_webpage( | |
90 | self._BASE_URL + '/video/' + video_id, video_id) | |
91 | ||
92 | if 'Ten film jest dostępny dla użytkowników premium' in webpage: | |
93 | raise ExtractorError('This video is only available for premium users.', expected=True) | |
94 | ||
95 | if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage): | |
96 | self.raise_geo_restricted() | |
97 | ||
98 | need_confirm_age = False | |
99 | if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")', | |
100 | webpage, 'birthday validate form', default=None): | |
101 | webpage = self._download_age_confirm_page( | |
102 | url, video_id, note='Confirming age') | |
103 | need_confirm_age = True | |
104 | ||
105 | formats = [] | |
106 | ||
107 | uploader = self._search_regex(r'''(?x) | |
108 | <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> | |
109 | (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? | |
110 | <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> | |
111 | ''', webpage, 'uploader', default=None, group='uploader') | |
112 | view_count = self._search_regex( | |
113 | r'Odsłony:(?:\s| )*([0-9]+)', webpage, | |
114 | 'view_count', default=None) | |
115 | average_rating = self._search_regex( | |
116 | (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | |
117 | r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False, | |
118 | group='rating_value') | |
119 | ||
120 | info_dict = { | |
121 | 'id': video_id, | |
122 | 'title': self._og_search_title(webpage), | |
123 | 'description': self._og_search_description(webpage), | |
124 | 'uploader': uploader, | |
125 | 'view_count': int_or_none(view_count), | |
126 | 'average_rating': float_or_none(average_rating), | |
127 | 'thumbnail': self._og_search_thumbnail(webpage), | |
128 | 'formats': formats, | |
129 | 'duration': None, | |
130 | 'age_limit': 18 if need_confirm_age else 0, | |
131 | } | |
132 | ||
133 | info = self._search_json_ld(webpage, video_id, default={}) | |
134 | ||
135 | # Source: https://www.cda.pl/js/player.js?t=1606154898 | |
136 | def decrypt_file(a): | |
137 | for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): | |
138 | a = a.replace(p, '') | |
139 | a = compat_urllib_parse_unquote(a) | |
140 | b = [] | |
141 | for c in a: | |
142 | f = compat_ord(c) | |
143 | b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f)) | |
144 | a = ''.join(b) | |
145 | a = a.replace('.cda.mp4', '') | |
146 | for p in ('.2cda.pl', '.3cda.pl'): | |
147 | a = a.replace(p, '.cda.pl') | |
148 | if '/upstream' in a: | |
149 | a = a.replace('/upstream', '.mp4/upstream') | |
150 | return 'https://' + a | |
151 | return 'https://' + a + '.mp4' | |
152 | ||
153 | def extract_format(page, version): | |
154 | json_str = self._html_search_regex( | |
155 | r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, | |
156 | '%s player_json' % version, fatal=False, group='player_data') | |
157 | if not json_str: | |
158 | return | |
159 | player_data = self._parse_json( | |
160 | json_str, '%s player_data' % version, fatal=False) | |
161 | if not player_data: | |
162 | return | |
163 | video = player_data.get('video') | |
164 | if not video or 'file' not in video: | |
165 | self.report_warning('Unable to extract %s version information' % version) | |
166 | return | |
167 | if video['file'].startswith('uggc'): | |
168 | video['file'] = codecs.decode(video['file'], 'rot_13') | |
169 | if video['file'].endswith('adc.mp4'): | |
170 | video['file'] = video['file'].replace('adc.mp4', '.mp4') | |
171 | elif not video['file'].startswith('http'): | |
172 | video['file'] = decrypt_file(video['file']) | |
173 | video_quality = video.get('quality') | |
174 | qualities = video.get('qualities', {}) | |
175 | video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality) | |
176 | info_dict['formats'].append({ | |
177 | 'url': video['file'], | |
178 | 'format_id': video_quality, | |
179 | 'height': int_or_none(video_quality[:-1]), | |
180 | }) | |
181 | for quality, cda_quality in qualities.items(): | |
182 | if quality == video_quality: | |
183 | continue | |
184 | data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, | |
185 | 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} | |
186 | data = json.dumps(data).encode('utf-8') | |
187 | video_url = self._download_json( | |
188 | f'https://www.cda.pl/video/{video_id}', video_id, headers={ | |
189 | 'Content-Type': 'application/json', | |
190 | 'X-Requested-With': 'XMLHttpRequest' | |
191 | }, data=data, note=f'Fetching {quality} url', | |
192 | errnote=f'Failed to fetch {quality} url', fatal=False) | |
193 | if try_get(video_url, lambda x: x['result']['status']) == 'ok': | |
194 | video_url = try_get(video_url, lambda x: x['result']['resp']) | |
195 | info_dict['formats'].append({ | |
196 | 'url': video_url, | |
197 | 'format_id': quality, | |
198 | 'height': int_or_none(quality[:-1]) | |
199 | }) | |
200 | ||
201 | if not info_dict['duration']: | |
202 | info_dict['duration'] = parse_duration(video.get('duration')) | |
203 | ||
204 | extract_format(webpage, 'default') | |
205 | ||
206 | for href, resolution in re.findall( | |
207 | r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', | |
208 | webpage): | |
209 | if need_confirm_age: | |
210 | handler = self._download_age_confirm_page | |
211 | else: | |
212 | handler = self._download_webpage | |
213 | ||
214 | webpage = handler( | |
215 | urljoin(self._BASE_URL, href), video_id, | |
216 | 'Downloading %s version information' % resolution, fatal=False) | |
217 | if not webpage: | |
218 | # Manually report warning because empty page is returned when | |
219 | # invalid version is requested. | |
220 | self.report_warning('Unable to download %s version information' % resolution) | |
221 | continue | |
222 | ||
223 | extract_format(webpage, resolution) | |
224 | ||
225 | self._sort_formats(formats) | |
226 | ||
227 | return merge_dicts(info_dict, info) |