]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/metacritic.py
Merge branch 'abc' of https://github.com/adrianheine/youtube-dl into adrianheine-abc
[yt-dlp.git] / youtube_dlc / extractor / metacritic.py
CommitLineData
9c631286
PH
1from __future__ import unicode_literals
2
7e772752 3import re
7e772752
JMF
4
5from .common import InfoExtractor
18258362 6from ..utils import (
5aafe895 7 fix_xml_ampersands,
18258362 8)
7e772752
JMF
9
10
11class MetacriticIE(InfoExtractor):
92519402 12 _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'
7e772752 13
86475d59 14 _TESTS = [{
9c631286 15 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
9c631286 16 'info_dict': {
87a25660
JMF
17 'id': '3698222',
18 'ext': 'mp4',
9c631286
PH
19 'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
20 'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
21 'duration': 221,
7e772752 22 },
86475d59
YCH
23 'skip': 'Not providing trailers anymore',
24 }, {
25 'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
26 'info_dict': {
27 'id': '5740315',
28 'ext': 'mp4',
29 'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
30 'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
31 'duration': 114,
32 },
33 }]
7e772752
JMF
34
35 def _real_extract(self, url):
36 mobj = re.match(self._VALID_URL, url)
37 video_id = mobj.group('id')
38 webpage = self._download_webpage(url, video_id)
39 # The xml is not well formatted, there are raw '&'
18258362 40 info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
9e1a5b84 41 video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
7e772752
JMF
42
43 clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
44 formats = []
45 for videoFile in clip.findall('httpURI/videoFile'):
46 rate_str = videoFile.find('rate').text
47 video_url = videoFile.find('filePath').text
48 formats.append({
49 'url': video_url,
50 'ext': 'mp4',
51 'format_id': rate_str,
9c631286 52 'tbr': int(rate_str),
7e772752 53 })
9c631286 54 self._sort_formats(formats)
7e772752
JMF
55
56 description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
9e1a5b84 57 webpage, 'description', flags=re.DOTALL)
7e772752 58
fb7abb31 59 return {
7e772752
JMF
60 'id': video_id,
61 'title': clip.find('title').text,
62 'formats': formats,
63 'description': description,
64 'duration': int(clip.find('duration').text),
65 }