]>
Commit | Line | Data |
---|---|---|
ffca4b5c JMF |
1 | import re |
2 | import xml.etree.ElementTree | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import unified_strdate | |
6 | ||
7 | class CanalplusIE(InfoExtractor): | |
8 | _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)' | |
9 | _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' | |
10 | IE_NAME = u'canalplus.fr' | |
11 | ||
12 | _TEST = { | |
13 | u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861', | |
14 | u'file': u'889861.flv', | |
15 | u'md5': u'590a888158b5f0d6832f84001fbf3e99', | |
16 | u'info_dict': { | |
17 | u'title': u'Le Petit Journal 20/06/13 - La guerre des drone', | |
18 | u'upload_date': u'20130620', | |
19 | }, | |
20 | u'skip': u'Requires rtmpdump' | |
21 | } | |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | video_id = mobj.group('id') | |
26 | info_url = self._VIDEO_INFO_TEMPLATE % video_id | |
27 | info_page = self._download_webpage(info_url,video_id, | |
28 | u'Downloading video info') | |
29 | ||
30 | self.report_extraction(video_id) | |
31 | doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8')) | |
32 | video_info = [video for video in doc if video.find('ID').text == video_id][0] | |
33 | infos = video_info.find('INFOS') | |
34 | media = video_info.find('MEDIA') | |
35 | formats = [media.find('VIDEOS/%s' % format) | |
36 | for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']] | |
37 | video_url = [format.text for format in formats if format is not None][-1] | |
38 | ||
39 | return {'id': video_id, | |
40 | 'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text, | |
41 | infos.find('TITRAGE/SOUS_TITRE').text), | |
42 | 'url': video_url, | |
43 | 'ext': 'flv', | |
44 | 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), | |
45 | 'thumbnail': media.find('IMAGES/GRAND').text, | |
46 | } |