]>
Commit | Line | Data |
---|---|---|
ca9e7922 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import json | |
aa0c8739 JMF |
4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
ca9e7922 | 8 | unescapeHTML, |
aa0c8739 JMF |
9 | ) |
10 | ||
ca9e7922 | 11 | |
aa0c8739 | 12 | class CSpanIE(InfoExtractor): |
407ae733 | 13 | _VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)' |
ca9e7922 | 14 | IE_DESC = 'C-SPAN' |
6f5ac90c | 15 | _TEST = { |
ca9e7922 PH |
16 | 'url': 'http://www.c-spanvideo.org/program/HolderonV', |
17 | 'file': '315139.mp4', | |
18 | 'md5': '8e44ce11f0f725527daccc453f553eb0', | |
19 | 'info_dict': { | |
20 | 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', | |
21 | 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', | |
6f5ac90c | 22 | }, |
11577ec0 | 23 | 'skip': 'Regularly fails on travis, for unknown reasons', |
6f5ac90c | 24 | } |
aa0c8739 JMF |
25 | |
26 | def _real_extract(self, url): | |
27 | mobj = re.match(self._VALID_URL, url) | |
407ae733 | 28 | prog_name = mobj.group('name') |
aa0c8739 | 29 | webpage = self._download_webpage(url, prog_name) |
407ae733 | 30 | video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id') |
ca9e7922 PH |
31 | |
32 | title = self._html_search_regex( | |
33 | r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title') | |
34 | description = self._og_search_description(webpage) | |
35 | ||
36 | info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id | |
37 | data_json = self._download_webpage( | |
38 | info_url, video_id, 'Downloading video info') | |
39 | data = json.loads(data_json) | |
40 | ||
41 | url = unescapeHTML(data['video']['files'][0]['path']['#text']) | |
42 | ||
43 | return { | |
44 | 'id': video_id, | |
45 | 'title': title, | |
46 | 'url': url, | |
47 | 'description': description, | |
48 | 'thumbnail': self._og_search_thumbnail(webpage), | |
49 | } |