]>
Commit | Line | Data |
---|---|---|
1b124d19 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
1b124d19 PH |
6 | |
7 | ||
8 | class ParliamentLiveUKIE(InfoExtractor): | |
9 | IE_NAME = 'parliamentlive.tv' | |
10 | IE_DESC = 'UK parliament videos' | |
11 | _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)' | |
12 | ||
13 | _TEST = { | |
14 | 'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia', | |
15 | 'info_dict': { | |
16 | 'id': '15121', | |
17 | 'ext': 'asf', | |
18 | 'title': 'hoc home affairs committee, 18 mar 2014.pm', | |
19 | 'description': 'md5:033b3acdf83304cd43946b2d5e5798d1', | |
20 | }, | |
21 | 'params': { | |
22 | 'skip_download': True, # Requires mplayer (mms) | |
23 | } | |
24 | } | |
25 | ||
26 | def _real_extract(self, url): | |
27 | mobj = re.match(self._VALID_URL, url) | |
28 | video_id = mobj.group('id') | |
29 | webpage = self._download_webpage(url, video_id) | |
30 | ||
31 | asx_url = self._html_search_regex( | |
32 | r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage, | |
33 | 'metadata URL') | |
34 | asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata') | |
35 | video_url = asx.find('.//REF').attrib['HREF'] | |
36 | ||
37 | title = self._search_regex( | |
38 | r'''(?x)player\.setClipDetails\( | |
39 | (?:(?:[0-9]+|"[^"]+"),\s*){2} | |
40 | "([^"]+",\s*"[^"]+)" | |
41 | ''', | |
42 | webpage, 'title').replace('", "', ', ') | |
43 | description = self._html_search_regex( | |
44 | r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>', | |
45 | webpage, 'description') | |
46 | ||
47 | return { | |
48 | 'id': video_id, | |
49 | 'ext': 'asf', | |
50 | 'url': video_url, | |
51 | 'title': title, | |
52 | 'description': description, | |
53 | } |