jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	import re
	2	import xml.etree.ElementTree
	3	import operator
	4
	5	from .common import InfoExtractor
	6
	7
	8	class MetacriticIE(InfoExtractor):
	9	_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
	10
	11	_TEST = {
	12	u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
	13	u'file': u'3698222.mp4',
	14	u'info_dict': {
	15	u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
	16	u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
	17	u'duration': 221,
	18	},
	19	}
	20
	21	def _real_extract(self, url):
	22	mobj = re.match(self._VALID_URL, url)
	23	video_id = mobj.group('id')
	24	webpage = self._download_webpage(url, video_id)
	25	# The xml is not well formatted, there are raw '&'
	26	info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
	27	video_id, u'Downloading info xml').replace('&', '&')
	28	info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
	29
	30	clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
	31	formats = []
	32	for videoFile in clip.findall('httpURI/videoFile'):
	33	rate_str = videoFile.find('rate').text
	34	video_url = videoFile.find('filePath').text
	35	formats.append({
	36	'url': video_url,
	37	'ext': 'mp4',
	38	'format_id': rate_str,
	39	'rate': int(rate_str),
	40	})
	41	formats.sort(key=operator.itemgetter('rate'))
	42
	43	description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
	44	webpage, u'description', flags=re.DOTALL)
	45
	46	info = {
	47	'id': video_id,
	48	'title': clip.find('title').text,
	49	'formats': formats,
	50	'description': description,
	51	'duration': int(clip.find('duration').text),
	52	}
	53	# TODO: Remove when #980 has been merged
	54	info.update(formats[-1])
	55	return info