jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import random
	5	import re
	6	import string
	7	import time
	8
	9	from .common import InfoExtractor
	10	from ..utils import (
	11	ExtractorError,
	12	get_element_by_class,
	13	js_to_json,
	14	str_or_none,
	15	strip_jsonp,
	16	)
	17
	18
	19	class YoukuIE(InfoExtractor):
	20	IE_NAME = 'youku'
	21	IE_DESC = '优酷'
	22	_VALID_URL = r'''(?x)
	23	(?:
	24	https?://(
	25	(?:v\|player)\.youku\.com/(?:v_show/id_\|player\.php/sid/)\|
	26	video\.tudou\.com/v/)\|
	27	youku:)
	28	(?P<id>[A-Za-z0-9]+)(?:\.html\|/v\.swf\|)
	29	'''
	30
	31	_TESTS = [{
	32	# MD5 is unstable
	33	'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
	34	'info_dict': {
	35	'id': 'XMTc1ODE5Njcy',
	36	'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
	37	'ext': 'mp4',
	38	'duration': 74.73,
	39	'thumbnail': r're:^https?://.*',
	40	'uploader': '。躲猫猫、',
	41	'uploader_id': '36017967',
	42	'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
	43	'tags': list,
	44	}
	45	}, {
	46	'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
	47	'only_matching': True,
	48	}, {
	49	'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
	50	'info_dict': {
	51	'id': 'XODgxNjg1Mzk2',
	52	'ext': 'mp4',
	53	'title': '武媚娘传奇 85',
	54	'duration': 1999.61,
	55	'thumbnail': r're:^https?://.*',
	56	'uploader': '疯狂豆花',
	57	'uploader_id': '62583473',
	58	'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
	59	'tags': list,
	60	},
	61	}, {
	62	'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
	63	'info_dict': {
	64	'id': 'XMTI1OTczNDM5Mg',
	65	'ext': 'mp4',
	66	'title': '花千骨 04',
	67	'duration': 2363,
	68	'thumbnail': r're:^https?://.*',
	69	'uploader': '放剧场-花千骨',
	70	'uploader_id': '772849359',
	71	'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
	72	'tags': list,
	73	},
	74	}, {
	75	'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
	76	'note': 'Video protected with password',
	77	'info_dict': {
	78	'id': 'XNjA1NzA2Njgw',
	79	'ext': 'mp4',
	80	'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
	81	'duration': 7264.5,
	82	'thumbnail': r're:^https?://.*',
	83	'uploader': 'FoxJin1006',
	84	'uploader_id': '322014285',
	85	'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
	86	'tags': list,
	87	},
	88	'params': {
	89	'videopassword': '100600',
	90	},
	91	}, {
	92	# /play/get.json contains streams with "channel_type":"tail"
	93	'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
	94	'info_dict': {
	95	'id': 'XOTUxMzg4NDMy',
	96	'ext': 'mp4',
	97	'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
	98	'duration': 702.08,
	99	'thumbnail': r're:^https?://.*',
	100	'uploader': '明月庄主moon',
	101	'uploader_id': '38465621',
	102	'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0',
	103	'tags': list,
	104	},
	105	}, {
	106	'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
	107	'info_dict': {
	108	'id': 'XMjIyNzAzMTQ4NA',
	109	'ext': 'mp4',
	110	'title': '卡马乔国足开大脚长传冲吊集锦',
	111	'duration': 289,
	112	'thumbnail': r're:^https?://.*',
	113	'uploader': '阿卜杜拉之星',
	114	'uploader_id': '2382249',
	115	'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==',
	116	'tags': list,
	117	},
	118	}, {
	119	'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
	120	'only_matching': True,
	121	}]
	122
	123	@staticmethod
	124	def get_ysuid():
	125	return '%d%s' % (int(time.time()), ''.join([
	126	random.choice(string.ascii_letters) for i in range(3)]))
	127
	128	def get_format_name(self, fm):
	129	_dict = {
	130	'3gp': 'h6',
	131	'3gphd': 'h5',
	132	'flv': 'h4',
	133	'flvhd': 'h4',
	134	'mp4': 'h3',
	135	'mp4hd': 'h3',
	136	'mp4hd2': 'h4',
	137	'mp4hd3': 'h4',
	138	'hd2': 'h2',
	139	'hd3': 'h1',
	140	}
	141	return _dict.get(fm)
	142
	143	def _real_extract(self, url):
	144	video_id = self._match_id(url)
	145
	146	self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
	147	self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
	148
	149	_, urlh = self._download_webpage_handle(
	150	'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
	151	# The etag header is '"foobar"'; let's remove the double quotes
	152	cna = urlh.headers['etag'][1:-1]
	153
	154	# request basic data
	155	basic_data_params = {
	156	'vid': video_id,
	157	'ccode': '0532',
	158	'client_ip': '192.168.1.1',
	159	'utid': cna,
	160	'client_ts': time.time() / 1000,
	161	}
	162
	163	video_password = self.get_param('videopassword')
	164	if video_password:
	165	basic_data_params['password'] = video_password
	166
	167	headers = {
	168	'Referer': url,
	169	}
	170	headers.update(self.geo_verification_headers())
	171	data = self._download_json(
	172	'https://ups.youku.com/ups/get.json', video_id,
	173	'Downloading JSON metadata',
	174	query=basic_data_params, headers=headers)['data']
	175
	176	error = data.get('error')
	177	if error:
	178	error_note = error.get('note')
	179	if error_note is not None and '因版权原因无法观看此视频' in error_note:
	180	raise ExtractorError(
	181	'Youku said: Sorry, this video is available in China only', expected=True)
	182	elif error_note and '该视频被设为私密' in error_note:
	183	raise ExtractorError(
	184	'Youku said: Sorry, this video is private', expected=True)
	185	else:
	186	msg = 'Youku server reported error %i' % error.get('code')
	187	if error_note is not None:
	188	msg += ': ' + error_note
	189	raise ExtractorError(msg)
	190
	191	# get video title
	192	video_data = data['video']
	193	title = video_data['title']
	194
	195	formats = [{
	196	'url': stream['m3u8_url'],
	197	'format_id': self.get_format_name(stream.get('stream_type')),
	198	'ext': 'mp4',
	199	'protocol': 'm3u8_native',
	200	'filesize': int(stream.get('size')),
	201	'width': stream.get('width'),
	202	'height': stream.get('height'),
	203	} for stream in data['stream'] if stream.get('channel_type') != 'tail']
	204	self._sort_formats(formats)
	205
	206	return {
	207	'id': video_id,
	208	'title': title,
	209	'formats': formats,
	210	'duration': video_data.get('seconds'),
	211	'thumbnail': video_data.get('logo'),
	212	'uploader': video_data.get('username'),
	213	'uploader_id': str_or_none(video_data.get('userid')),
	214	'uploader_url': data.get('uploader', {}).get('homepage'),
	215	'tags': video_data.get('tags'),
	216	}
	217
	218
	219	class YoukuShowIE(InfoExtractor):
	220	_VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
	221	IE_NAME = 'youku:show'
	222
	223	_TESTS = [{
	224	'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
	225	'info_dict': {
	226	'id': 'zc7c670be07ff11e48b3f',
	227	'title': '花千骨 DVD版',
	228	'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
	229	},
	230	'playlist_count': 50,
	231	}, {
	232	# Episode number not starting from 1
	233	'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
	234	'info_dict': {
	235	'id': 'zefbfbd70efbfbd780bef',
	236	'title': '超级飞侠3',
	237	'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
	238	},
	239	'playlist_count': 24,
	240	}, {
	241	# Ongoing playlist. The initial page is the last one
	242	'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
	243	'only_matching': True,
	244	}, {
	245	# No data-id value.
	246	'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html',
	247	'only_matching': True,
	248	}, {
	249	# Wrong number of reload_id.
	250	'url': 'http://list.youku.com/show/id_z20eb4acaf5c211e3b2ad.html',
	251	'only_matching': True,
	252	}]
	253
	254	def _extract_entries(self, playlist_data_url, show_id, note, query):
	255	query['callback'] = 'cb'
	256	playlist_data = self._download_json(
	257	playlist_data_url, show_id, query=query, note=note,
	258	transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
	259	if playlist_data is None:
	260	return [None, None]
	261	drama_list = (get_element_by_class('p-drama-grid', playlist_data)
	262	or get_element_by_class('p-drama-half-row', playlist_data))
	263	if drama_list is None:
	264	raise ExtractorError('No episodes found')
	265	video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
	266	return playlist_data, [
	267	self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
	268	for video_url in video_urls]
	269
	270	def _real_extract(self, url):
	271	show_id = self._match_id(url)
	272	webpage = self._download_webpage(url, show_id)
	273
	274	entries = []
	275	page_config = self._parse_json(self._search_regex(
	276	r'var\s+PageConfig\s=\s({.+});', webpage, 'page config'),
	277	show_id, transform_source=js_to_json)
	278	first_page, initial_entries = self._extract_entries(
	279	'http://list.youku.com/show/module', show_id,
	280	note='Downloading initial playlist data page',
	281	query={
	282	'id': page_config['showid'],
	283	'tab': 'showInfo',
	284	})
	285	first_page_reload_id = self._html_search_regex(
	286	r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
	287	# The first reload_id has the same items as first_page
	288	reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
	289	entries.extend(initial_entries)
	290	for idx, reload_id in enumerate(reload_ids):
	291	if reload_id == first_page_reload_id:
	292	continue
	293	_, new_entries = self._extract_entries(
	294	'http://list.youku.com/show/episode', show_id,
	295	note='Downloading playlist data page %d' % (idx + 1),
	296	query={
	297	'id': page_config['showid'],
	298	'stage': reload_id,
	299	})
	300	if new_entries is not None:
	301	entries.extend(new_entries)
	302	desc = self._html_search_meta('description', webpage, fatal=False)
	303	playlist_title = desc.split(',')[0] if desc else None
	304	detail_li = get_element_by_class('p-intro', webpage)
	305	playlist_description = get_element_by_class(
	306	'intro-more', detail_li) if detail_li else None
	307
	308	return self.playlist_result(
	309	entries, show_id, playlist_title, playlist_description)