jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import json
	5	import time
	6	import hmac
	7	import hashlib
	8	import itertools
	9
	10	from .common import InfoExtractor
	11	from ..utils import (
	12	ExtractorError,
	13	int_or_none,
	14	parse_age_limit,
	15	parse_iso8601,
	16	sanitized_Request,
	17	)
	18
	19
	20	class VikiBaseIE(InfoExtractor):
	21	_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com\|net\|mx\|jp\|fr)/'
	22	_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
	23	_API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
	24
	25	_APP = '65535a'
	26	_APP_VERSION = '2.2.5.1428709186'
	27	_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r\|ulZ,U4LC/SeR)'
	28
	29	_NETRC_MACHINE = 'viki'
	30
	31	_token = None
	32
	33	_ERRORS = {
	34	'geo': 'Sorry, this content is not available in your region.',
	35	'upcoming': 'Sorry, this content is not yet available.',
	36	# 'paywall': 'paywall',
	37	}
	38
	39	def _prepare_call(self, path, timestamp=None, post_data=None):
	40	path += '?' if '?' not in path else '&'
	41	if not timestamp:
	42	timestamp = int(time.time())
	43	query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
	44	if self._token:
	45	query += '&token=%s' % self._token
	46	sig = hmac.new(
	47	self._APP_SECRET.encode('ascii'),
	48	query.encode('ascii'),
	49	hashlib.sha1
	50	).hexdigest()
	51	url = self._API_URL_TEMPLATE % (query, sig)
	52	return sanitized_Request(
	53	url, json.dumps(post_data).encode('utf-8')) if post_data else url
	54
	55	def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
	56	resp = self._download_json(
	57	self._prepare_call(path, timestamp, post_data), video_id, note)
	58
	59	error = resp.get('error')
	60	if error:
	61	if error == 'invalid timestamp':
	62	resp = self._download_json(
	63	self._prepare_call(path, int(resp['current_timestamp']), post_data),
	64	video_id, '%s (retry)' % note)
	65	error = resp.get('error')
	66	if error:
	67	self._raise_error(resp['error'])
	68
	69	return resp
	70
	71	def _raise_error(self, error):
	72	raise ExtractorError(
	73	'%s returned error: %s' % (self.IE_NAME, error),
	74	expected=True)
	75
	76	def _check_errors(self, data):
	77	for reason, status in data.get('blocking', {}).items():
	78	if status and reason in self._ERRORS:
	79	raise ExtractorError('%s said: %s' % (
	80	self.IE_NAME, self._ERRORS[reason]), expected=True)
	81
	82	def _real_initialize(self):
	83	self._login()
	84
	85	def _login(self):
	86	(username, password) = self._get_login_info()
	87	if username is None:
	88	return
	89
	90	login_form = {
	91	'login_id': username,
	92	'password': password,
	93	}
	94
	95	login = self._call_api(
	96	'sessions.json', None,
	97	'Logging in as %s' % username, post_data=login_form)
	98
	99	self._token = login.get('token')
	100	if not self._token:
	101	self.report_warning('Unable to get session token, login has probably failed')
	102
	103	@staticmethod
	104	def dict_selection(dict_obj, preferred_key):
	105	if preferred_key in dict_obj:
	106	return dict_obj.get(preferred_key)
	107
	108	filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
	109	return filtered_dict[0] if filtered_dict else None
	110
	111
	112	class VikiIE(VikiBaseIE):
	113	IE_NAME = 'viki'
	114	_VALID_URL = r'%s(?:videos\|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
	115	_TESTS = [{
	116	'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
	117	'info_dict': {
	118	'id': '1023585v',
	119	'ext': 'mp4',
	120	'title': 'Heirs Episode 14',
	121	'uploader': 'SBS',
	122	'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
	123	'upload_date': '20131121',
	124	'age_limit': 13,
	125	},
	126	'skip': 'Blocked in the US',
	127	}, {
	128	# clip
	129	'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
	130	'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
	131	'info_dict': {
	132	'id': '1067139v',
	133	'ext': 'mp4',
	134	'title': "'The Avengers: Age of Ultron' Press Conference",
	135	'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
	136	'duration': 352,
	137	'timestamp': 1430380829,
	138	'upload_date': '20150430',
	139	'uploader': 'Arirang TV',
	140	'like_count': int,
	141	'age_limit': 0,
	142	}
	143	}, {
	144	'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
	145	'info_dict': {
	146	'id': '1048879v',
	147	'ext': 'mp4',
	148	'title': 'Ankhon Dekhi',
	149	'duration': 6512,
	150	'timestamp': 1408532356,
	151	'upload_date': '20140820',
	152	'uploader': 'Spuul',
	153	'like_count': int,
	154	'age_limit': 13,
	155	},
	156	'params': {
	157	# m3u8 download
	158	'skip_download': True,
	159	}
	160	}, {
	161	# episode
	162	'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
	163	'md5': '190f3ef426005ba3a080a63325955bc3',
	164	'info_dict': {
	165	'id': '44699v',
	166	'ext': 'mp4',
	167	'title': 'Boys Over Flowers - Episode 1',
	168	'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
	169	'duration': 4155,
	170	'timestamp': 1270496524,
	171	'upload_date': '20100405',
	172	'uploader': 'group8',
	173	'like_count': int,
	174	'age_limit': 13,
	175	}
	176	}, {
	177	# youtube external
	178	'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
	179	'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
	180	'info_dict': {
	181	'id': '50562v',
	182	'ext': 'mp4',
	183	'title': 'Poor Nastya [COMPLETE] - Episode 1',
	184	'description': '',
	185	'duration': 607,
	186	'timestamp': 1274949505,
	187	'upload_date': '20101213',
	188	'uploader': 'ad14065n',
	189	'uploader_id': 'ad14065n',
	190	'like_count': int,
	191	'age_limit': 13,
	192	}
	193	}, {
	194	'url': 'http://www.viki.com/player/44699v',
	195	'only_matching': True,
	196	}, {
	197	# non-English description
	198	'url': 'http://www.viki.com/videos/158036v-love-in-magic',
	199	'md5': '1713ae35df5a521b31f6dc40730e7c9c',
	200	'info_dict': {
	201	'id': '158036v',
	202	'ext': 'mp4',
	203	'uploader': 'I Planet Entertainment',
	204	'upload_date': '20111122',
	205	'timestamp': 1321985454,
	206	'description': 'md5:44b1e46619df3a072294645c770cef36',
	207	'title': 'Love In Magic',
	208	'age_limit': 13,
	209	},
	210	}]
	211
	212	def _real_extract(self, url):
	213	video_id = self._match_id(url)
	214
	215	video = self._call_api(
	216	'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
	217
	218	self._check_errors(video)
	219
	220	title = self.dict_selection(video.get('titles', {}), 'en')
	221	if not title:
	222	title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
	223	container_titles = video.get('container', {}).get('titles', {})
	224	container_title = self.dict_selection(container_titles, 'en')
	225	title = '%s - %s' % (container_title, title)
	226
	227	description = self.dict_selection(video.get('descriptions', {}), 'en')
	228
	229	duration = int_or_none(video.get('duration'))
	230	timestamp = parse_iso8601(video.get('created_at'))
	231	uploader = video.get('author')
	232	like_count = int_or_none(video.get('likes', {}).get('count'))
	233	age_limit = parse_age_limit(video.get('rating'))
	234
	235	thumbnails = []
	236	for thumbnail_id, thumbnail in video.get('images', {}).items():
	237	thumbnails.append({
	238	'id': thumbnail_id,
	239	'url': thumbnail.get('url'),
	240	})
	241
	242	subtitles = {}
	243	for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
	244	subtitles[subtitle_lang] = [{
	245	'ext': subtitles_format,
	246	'url': self._prepare_call(
	247	'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
	248	} for subtitles_format in ('srt', 'vtt')]
	249
	250	result = {
	251	'id': video_id,
	252	'title': title,
	253	'description': description,
	254	'duration': duration,
	255	'timestamp': timestamp,
	256	'uploader': uploader,
	257	'like_count': like_count,
	258	'age_limit': age_limit,
	259	'thumbnails': thumbnails,
	260	'subtitles': subtitles,
	261	}
	262
	263	streams = self._call_api(
	264	'videos/%s/streams.json' % video_id, video_id,
	265	'Downloading video streams JSON')
	266
	267	if 'external' in streams:
	268	result.update({
	269	'_type': 'url_transparent',
	270	'url': streams['external']['url'],
	271	})
	272	return result
	273
	274	formats = []
	275	for format_id, stream_dict in streams.items():
	276	height = int_or_none(self._search_regex(
	277	r'^(\d+)[pP]$', format_id, 'height', default=None))
	278	for protocol, format_dict in stream_dict.items():
	279	if format_id == 'm3u8':
	280	formats.extend(self._extract_m3u8_formats(
	281	format_dict['url'], video_id, 'mp4', 'm3u8_native',
	282	m3u8_id='m3u8-%s' % protocol, fatal=False))
	283	else:
	284	formats.append({
	285	'url': format_dict['url'],
	286	'format_id': '%s-%s' % (format_id, protocol),
	287	'height': height,
	288	})
	289	self._sort_formats(formats)
	290
	291	result['formats'] = formats
	292	return result
	293
	294
	295	class VikiChannelIE(VikiBaseIE):
	296	IE_NAME = 'viki:channel'
	297	_VALID_URL = r'%s(?:tv\|news\|movies\|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
	298	_TESTS = [{
	299	'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
	300	'info_dict': {
	301	'id': '50c',
	302	'title': 'Boys Over Flowers',
	303	'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
	304	},
	305	'playlist_count': 70,
	306	}, {
	307	'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
	308	'info_dict': {
	309	'id': '1354c',
	310	'title': 'Poor Nastya [COMPLETE]',
	311	'description': 'md5:05bf5471385aa8b21c18ad450e350525',
	312	},
	313	'playlist_count': 127,
	314	}, {
	315	'url': 'http://www.viki.com/news/24569c-showbiz-korea',
	316	'only_matching': True,
	317	}, {
	318	'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
	319	'only_matching': True,
	320	}, {
	321	'url': 'http://www.viki.com/artists/2141c-shinee',
	322	'only_matching': True,
	323	}]
	324
	325	_PER_PAGE = 25
	326
	327	def _real_extract(self, url):
	328	channel_id = self._match_id(url)
	329
	330	channel = self._call_api(
	331	'containers/%s.json' % channel_id, channel_id,
	332	'Downloading channel JSON')
	333
	334	self._check_errors(channel)
	335
	336	title = self.dict_selection(channel['titles'], 'en')
	337
	338	description = self.dict_selection(channel['descriptions'], 'en')
	339
	340	entries = []
	341	for video_type in ('episodes', 'clips', 'movies'):
	342	for page_num in itertools.count(1):
	343	page = self._call_api(
	344	'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
	345	% (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
	346	'Downloading %s JSON page #%d' % (video_type, page_num))
	347	for video in page['response']:
	348	video_id = video['id']
	349	entries.append(self.url_result(
	350	'http://www.viki.com/videos/%s' % video_id, 'Viki'))
	351	if not page['pagination']['next']:
	352	break
	353
	354	return self.playlist_result(entries, channel_id, title, description)