]>
Commit | Line | Data |
---|---|---|
fbaaad49 JMF |
1 | import re |
2 | import json | |
cfe50f04 | 3 | import xml.etree.ElementTree |
fbaaad49 JMF |
4 | |
5 | from .common import InfoExtractor | |
cfe50f04 JMF |
6 | from ..utils import ( |
7 | compat_urllib_parse, | |
8 | ) | |
fbaaad49 JMF |
9 | |
10 | class BrightcoveIE(InfoExtractor): | |
abb285fb | 11 | _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' |
cfe50f04 | 12 | _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' |
abb285fb | 13 | _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' |
cfe50f04 JMF |
14 | |
15 | # There is a test for Brigtcove in GenericIE, that way we test both the download | |
16 | # and the detection of videos, and we don't have to find an URL that is always valid | |
17 | ||
18 | @classmethod | |
19 | def _build_brighcove_url(cls, object_str): | |
20 | """ | |
21 | Build a Brightcove url from a xml string containing | |
22 | <object class="BrightcoveExperience">{params}</object> | |
23 | """ | |
24 | object_doc = xml.etree.ElementTree.fromstring(object_str) | |
117adb0f | 25 | assert u'BrightcoveExperience' in object_doc.attrib['class'] |
cfe50f04 | 26 | params = {'flashID': object_doc.attrib['id'], |
5de3ece2 | 27 | 'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'], |
cfe50f04 | 28 | } |
5de3ece2 | 29 | playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey') |
cfe50f04 JMF |
30 | # Not all pages define this value |
31 | if playerKey is not None: | |
32 | params['playerKey'] = playerKey.attrib['value'] | |
5de3ece2 | 33 | videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') |
abb285fb JMF |
34 | if videoPlayer is not None: |
35 | params['@videoPlayer'] = videoPlayer.attrib['value'] | |
cfe50f04 JMF |
36 | data = compat_urllib_parse.urlencode(params) |
37 | return cls._FEDERATED_URL_TEMPLATE % data | |
fbaaad49 JMF |
38 | |
39 | def _real_extract(self, url): | |
40 | mobj = re.match(self._VALID_URL, url) | |
41 | query = mobj.group('query') | |
fbaaad49 | 42 | |
abb285fb JMF |
43 | m_video_id = re.search(r'videoPlayer=(\d+)', query) |
44 | if m_video_id is not None: | |
45 | video_id = m_video_id.group(1) | |
46 | return self._get_video_info(video_id, query) | |
47 | else: | |
48 | player_key = self._search_regex(r'playerKey=(.+?)(&|$)', query, 'playlist_id') | |
49 | return self._get_playlist_info(player_key) | |
50 | ||
51 | def _get_video_info(self, video_id, query): | |
cfe50f04 | 52 | request_url = self._FEDERATED_URL_TEMPLATE % query |
fbaaad49 JMF |
53 | webpage = self._download_webpage(request_url, video_id) |
54 | ||
55 | self.report_extraction(video_id) | |
56 | info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') | |
57 | info = json.loads(info)['data'] | |
58 | video_info = info['programmedContent']['videoPlayer']['mediaDTO'] | |
abb285fb JMF |
59 | |
60 | return self._extract_video_info(video_info) | |
61 | ||
62 | def _get_playlist_info(self, player_key): | |
63 | playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, | |
64 | player_key, u'Downloading playlist information') | |
65 | ||
66 | playlist_info = json.loads(playlist_info)['videoList'] | |
67 | videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] | |
68 | ||
69 | return self.playlist_result(videos, playlist_id=playlist_info['id'], | |
70 | playlist_title=playlist_info['mediaCollectionDTO']['displayName']) | |
71 | ||
72 | def _extract_video_info(self, video_info): | |
fbaaad49 JMF |
73 | renditions = video_info['renditions'] |
74 | renditions = sorted(renditions, key=lambda r: r['size']) | |
75 | best_format = renditions[-1] | |
abb285fb JMF |
76 | |
77 | return {'id': video_info['id'], | |
fbaaad49 JMF |
78 | 'title': video_info['displayName'], |
79 | 'url': best_format['defaultURL'], | |
80 | 'ext': 'mp4', | |
81 | 'description': video_info.get('shortDescription'), | |
82 | 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), | |
83 | 'uploader': video_info.get('publisherName'), | |
84 | } |