]> jfr.im git - yt-dlp.git/blob - youtube_dlc/extractor/viki.py
Updated to release 2020.11.21.1
[yt-dlp.git] / youtube_dlc / extractor / viki.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import base64
5 import hashlib
6 import hmac
7 import itertools
8 import json
9 import re
10 import time
11
12 from .common import InfoExtractor
13 from ..compat import (
14 compat_parse_qs,
15 compat_urllib_parse_urlparse,
16 )
17 from ..utils import (
18 ExtractorError,
19 int_or_none,
20 HEADRequest,
21 parse_age_limit,
22 parse_iso8601,
23 sanitized_Request,
24 )
25
26
27 class VikiBaseIE(InfoExtractor):
28 _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
29 _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
30 _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
31
32 _APP = '100005a'
33 _APP_VERSION = '2.2.5.1428709186'
34 _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
35
36 _GEO_BYPASS = False
37 _NETRC_MACHINE = 'viki'
38
39 _token = None
40
41 _ERRORS = {
42 'geo': 'Sorry, this content is not available in your region.',
43 'upcoming': 'Sorry, this content is not yet available.',
44 # 'paywall': 'paywall',
45 }
46
47 def _prepare_call(self, path, timestamp=None, post_data=None):
48 path += '?' if '?' not in path else '&'
49 if not timestamp:
50 timestamp = int(time.time())
51 query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
52 if self._token:
53 query += '&token=%s' % self._token
54 sig = hmac.new(
55 self._APP_SECRET.encode('ascii'),
56 query.encode('ascii'),
57 hashlib.sha1
58 ).hexdigest()
59 url = self._API_URL_TEMPLATE % (query, sig)
60 return sanitized_Request(
61 url, json.dumps(post_data).encode('utf-8')) if post_data else url
62
63 def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
64 resp = self._download_json(
65 self._prepare_call(path, timestamp, post_data), video_id, note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
66
67 error = resp.get('error')
68 if error:
69 if error == 'invalid timestamp':
70 resp = self._download_json(
71 self._prepare_call(path, int(resp['current_timestamp']), post_data),
72 video_id, '%s (retry)' % note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
73 error = resp.get('error')
74 if error:
75 self._raise_error(resp['error'])
76
77 return resp
78
79 def _raise_error(self, error):
80 raise ExtractorError(
81 '%s returned error: %s' % (self.IE_NAME, error),
82 expected=True)
83
84 def _check_errors(self, data):
85 for reason, status in data.get('blocking', {}).items():
86 if status and reason in self._ERRORS:
87 message = self._ERRORS[reason]
88 if reason == 'geo':
89 self.raise_geo_restricted(msg=message)
90 raise ExtractorError('%s said: %s' % (
91 self.IE_NAME, message), expected=True)
92
93 def _real_initialize(self):
94 self._login()
95
96 def _login(self):
97 username, password = self._get_login_info()
98 if username is None:
99 return
100
101 login_form = {
102 'login_id': username,
103 'password': password,
104 }
105
106 login = self._call_api(
107 'sessions.json', None,
108 'Logging in', post_data=login_form)
109
110 self._token = login.get('token')
111 if not self._token:
112 self.report_warning('Unable to get session token, login has probably failed')
113
114 @staticmethod
115 def dict_selection(dict_obj, preferred_key, allow_fallback=True):
116 if preferred_key in dict_obj:
117 return dict_obj.get(preferred_key)
118
119 if not allow_fallback:
120 return
121
122 filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
123 return filtered_dict[0] if filtered_dict else None
124
125
126 class VikiIE(VikiBaseIE):
127 IE_NAME = 'viki'
128 _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
129 _TESTS = [{
130 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
131 'info_dict': {
132 'id': '1023585v',
133 'ext': 'mp4',
134 'title': 'Heirs Episode 14',
135 'uploader': 'SBS',
136 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
137 'upload_date': '20131121',
138 'age_limit': 13,
139 },
140 'skip': 'Blocked in the US',
141 }, {
142 # clip
143 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
144 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
145 'info_dict': {
146 'id': '1067139v',
147 'ext': 'mp4',
148 'title': "'The Avengers: Age of Ultron' Press Conference",
149 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
150 'duration': 352,
151 'timestamp': 1430380829,
152 'upload_date': '20150430',
153 'uploader': 'Arirang TV',
154 'like_count': int,
155 'age_limit': 0,
156 }
157 }, {
158 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
159 'info_dict': {
160 'id': '1048879v',
161 'ext': 'mp4',
162 'title': 'Ankhon Dekhi',
163 'duration': 6512,
164 'timestamp': 1408532356,
165 'upload_date': '20140820',
166 'uploader': 'Spuul',
167 'like_count': int,
168 'age_limit': 13,
169 },
170 'skip': 'Blocked in the US',
171 }, {
172 # episode
173 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
174 'md5': '94e0e34fd58f169f40c184f232356cfe',
175 'info_dict': {
176 'id': '44699v',
177 'ext': 'mp4',
178 'title': 'Boys Over Flowers - Episode 1',
179 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
180 'duration': 4172,
181 'timestamp': 1270496524,
182 'upload_date': '20100405',
183 'uploader': 'group8',
184 'like_count': int,
185 'age_limit': 13,
186 },
187 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
188 }, {
189 # youtube external
190 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
191 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
192 'info_dict': {
193 'id': '50562v',
194 'ext': 'webm',
195 'title': 'Poor Nastya [COMPLETE] - Episode 1',
196 'description': '',
197 'duration': 606,
198 'timestamp': 1274949505,
199 'upload_date': '20101213',
200 'uploader': 'ad14065n',
201 'uploader_id': 'ad14065n',
202 'like_count': int,
203 'age_limit': 13,
204 },
205 'skip': 'Page not found!',
206 }, {
207 'url': 'http://www.viki.com/player/44699v',
208 'only_matching': True,
209 }, {
210 # non-English description
211 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
212 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
213 'info_dict': {
214 'id': '158036v',
215 'ext': 'mp4',
216 'uploader': 'I Planet Entertainment',
217 'upload_date': '20111122',
218 'timestamp': 1321985454,
219 'description': 'md5:44b1e46619df3a072294645c770cef36',
220 'title': 'Love In Magic',
221 'age_limit': 13,
222 },
223 }]
224
225 def _real_extract(self, url):
226 video_id = self._match_id(url)
227
228 resp = self._download_json(
229 'https://www.viki.com/api/videos/' + video_id,
230 video_id, 'Downloading video JSON',
231 headers={'x-viki-app-ver': '4.0.57'})
232 video = resp['video']
233
234 self._check_errors(video)
235
236 title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
237 if not title:
238 title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
239 container_titles = video.get('container', {}).get('titles', {})
240 container_title = self.dict_selection(container_titles, 'en')
241 title = '%s - %s' % (container_title, title)
242
243 description = self.dict_selection(video.get('descriptions', {}), 'en')
244
245 duration = int_or_none(video.get('duration'))
246 timestamp = parse_iso8601(video.get('created_at'))
247 uploader = video.get('author')
248 like_count = int_or_none(video.get('likes', {}).get('count'))
249 age_limit = parse_age_limit(video.get('rating'))
250
251 thumbnails = []
252 for thumbnail_id, thumbnail in video.get('images', {}).items():
253 thumbnails.append({
254 'id': thumbnail_id,
255 'url': thumbnail.get('url'),
256 })
257
258 subtitles = {}
259 try:
260 # New way to fetch subtitles
261 new_video = self._download_json(
262 'https://www.viki.com/api/videos/%s' % video_id, video_id,
263 'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
264 for sub in new_video.get('streamSubtitles').get('dash'):
265 subtitles[sub.get('srclang')] = [{
266 'ext': 'vtt',
267 'url': sub.get('src'),
268 'completion': sub.get('percentage'),
269 }]
270 except AttributeError:
271 # fall-back to the old way if there isn't a streamSubtitles attribute
272 for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
273 subtitles[subtitle_lang] = [{
274 'ext': subtitles_format,
275 'url': self._prepare_call(
276 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
277 } for subtitles_format in ('srt', 'vtt')]
278
279 result = {
280 'id': video_id,
281 'title': title,
282 'description': description,
283 'duration': duration,
284 'timestamp': timestamp,
285 'uploader': uploader,
286 'like_count': like_count,
287 'age_limit': age_limit,
288 'thumbnails': thumbnails,
289 'subtitles': subtitles,
290 }
291
292 formats = []
293
294 def add_format(format_id, format_dict, protocol='http'):
295 # rtmps URLs does not seem to work
296 if protocol == 'rtmps':
297 return
298 format_url = format_dict.get('url')
299 if not format_url:
300 return
301 format_drms = format_dict.get('drms')
302 format_stream_id = format_dict.get('id')
303 qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
304 stream = qs.get('stream', [None])[0]
305 if stream:
306 format_url = base64.b64decode(stream).decode()
307 if format_id in ('m3u8', 'hls'):
308 m3u8_formats = self._extract_m3u8_formats(
309 format_url, video_id, 'mp4',
310 entry_protocol='m3u8_native',
311 m3u8_id='m3u8-%s' % protocol, fatal=False)
312 # Despite CODECS metadata in m3u8 all video-only formats
313 # are actually video+audio
314 for f in m3u8_formats:
315 if '_drm/index_' in f['url']:
316 continue
317 if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
318 f['acodec'] = None
319 formats.append(f)
320 elif format_id in ('mpd', 'dash'):
321 formats.extend(self._extract_mpd_formats(
322 format_url, video_id, 'mpd-%s' % protocol, fatal=False))
323 elif format_url.startswith('rtmp'):
324 mobj = re.search(
325 r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
326 format_url)
327 if not mobj:
328 return
329 formats.append({
330 'format_id': 'rtmp-%s' % format_id,
331 'ext': 'flv',
332 'url': mobj.group('url'),
333 'play_path': mobj.group('playpath'),
334 'app': mobj.group('app'),
335 'page_url': url,
336 'drms': format_drms,
337 'stream_id': format_stream_id,
338 })
339 else:
340 urlh = self._request_webpage(
341 HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
342 formats.append({
343 'url': format_url,
344 'format_id': '%s-%s' % (format_id, protocol),
345 'height': int_or_none(self._search_regex(
346 r'^(\d+)[pP]$', format_id, 'height', default=None)),
347 'drms': format_drms,
348 'stream_id': format_stream_id,
349 'filesize': int_or_none(urlh.headers.get('Content-Length')),
350 })
351
352 for format_id, format_dict in (resp.get('streams') or {}).items():
353 add_format(format_id, format_dict)
354 if not formats:
355 streams = self._call_api(
356 'videos/%s/streams.json' % video_id, video_id,
357 'Downloading video streams JSON')
358
359 if 'external' in streams:
360 result.update({
361 '_type': 'url_transparent',
362 'url': streams['external']['url'],
363 })
364 return result
365
366 for format_id, stream_dict in streams.items():
367 for protocol, format_dict in stream_dict.items():
368 add_format(format_id, format_dict, protocol)
369 self._sort_formats(formats)
370
371 result['formats'] = formats
372 return result
373
374
375 class VikiChannelIE(VikiBaseIE):
376 IE_NAME = 'viki:channel'
377 _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
378 _TESTS = [{
379 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
380 'info_dict': {
381 'id': '50c',
382 'title': 'Boys Over Flowers',
383 'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
384 },
385 'playlist_mincount': 71,
386 }, {
387 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
388 'info_dict': {
389 'id': '1354c',
390 'title': 'Poor Nastya [COMPLETE]',
391 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
392 },
393 'playlist_count': 127,
394 }, {
395 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
396 'only_matching': True,
397 }, {
398 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
399 'only_matching': True,
400 }, {
401 'url': 'http://www.viki.com/artists/2141c-shinee',
402 'only_matching': True,
403 }]
404
405 _PER_PAGE = 25
406
407 def _real_extract(self, url):
408 channel_id = self._match_id(url)
409
410 channel = self._call_api(
411 'containers/%s.json' % channel_id, channel_id,
412 'Downloading channel JSON')
413
414 self._check_errors(channel)
415
416 title = self.dict_selection(channel['titles'], 'en')
417
418 description = self.dict_selection(channel['descriptions'], 'en')
419
420 entries = []
421 for video_type in ('episodes', 'clips', 'movies'):
422 for page_num in itertools.count(1):
423 page = self._call_api(
424 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
425 % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
426 'Downloading %s JSON page #%d' % (video_type, page_num))
427 for video in page['response']:
428 video_id = video['id']
429 entries.append(self.url_result(
430 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
431 if not page['pagination']['next']:
432 break
433
434 return self.playlist_result(entries, channel_id, title, description)