]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | unsmuggle_url, | |
9 | ) | |
10 | from ..compat import ( | |
11 | compat_parse_qs, | |
12 | compat_urlparse, | |
13 | ) | |
14 | ||
15 | ||
16 | class SenateISVPIE(InfoExtractor): | |
17 | _COMM_MAP = [ | |
18 | ["ag", "76440", "http://ag-f.akamaihd.net"], | |
19 | ["aging", "76442", "http://aging-f.akamaihd.net"], | |
20 | ["approps", "76441", "http://approps-f.akamaihd.net"], | |
21 | ["armed", "76445", "http://armed-f.akamaihd.net"], | |
22 | ["banking", "76446", "http://banking-f.akamaihd.net"], | |
23 | ["budget", "76447", "http://budget-f.akamaihd.net"], | |
24 | ["cecc", "76486", "http://srs-f.akamaihd.net"], | |
25 | ["commerce", "80177", "http://commerce1-f.akamaihd.net"], | |
26 | ["csce", "75229", "http://srs-f.akamaihd.net"], | |
27 | ["dpc", "76590", "http://dpc-f.akamaihd.net"], | |
28 | ["energy", "76448", "http://energy-f.akamaihd.net"], | |
29 | ["epw", "76478", "http://epw-f.akamaihd.net"], | |
30 | ["ethics", "76449", "http://ethics-f.akamaihd.net"], | |
31 | ["finance", "76450", "http://finance-f.akamaihd.net"], | |
32 | ["foreign", "76451", "http://foreign-f.akamaihd.net"], | |
33 | ["govtaff", "76453", "http://govtaff-f.akamaihd.net"], | |
34 | ["help", "76452", "http://help-f.akamaihd.net"], | |
35 | ["indian", "76455", "http://indian-f.akamaihd.net"], | |
36 | ["intel", "76456", "http://intel-f.akamaihd.net"], | |
37 | ["intlnarc", "76457", "http://intlnarc-f.akamaihd.net"], | |
38 | ["jccic", "85180", "http://jccic-f.akamaihd.net"], | |
39 | ["jec", "76458", "http://jec-f.akamaihd.net"], | |
40 | ["judiciary", "76459", "http://judiciary-f.akamaihd.net"], | |
41 | ["rpc", "76591", "http://rpc-f.akamaihd.net"], | |
42 | ["rules", "76460", "http://rules-f.akamaihd.net"], | |
43 | ["saa", "76489", "http://srs-f.akamaihd.net"], | |
44 | ["smbiz", "76461", "http://smbiz-f.akamaihd.net"], | |
45 | ["srs", "75229", "http://srs-f.akamaihd.net"], | |
46 | ["uscc", "76487", "http://srs-f.akamaihd.net"], | |
47 | ["vetaff", "76462", "http://vetaff-f.akamaihd.net"], | |
48 | ["arch", "", "http://ussenate-f.akamaihd.net/"] | |
49 | ] | |
50 | _IE_NAME = 'senate.gov' | |
51 | _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)' | |
52 | _TESTS = [{ | |
53 | 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', | |
54 | 'info_dict': { | |
55 | 'id': 'judiciary031715', | |
56 | 'ext': 'flv', | |
57 | 'title': 'Integrated Senate Video Player', | |
58 | 'thumbnail': 're:^https?://.*\.(?:jpg|png)$', | |
59 | } | |
60 | }, { | |
61 | 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', | |
62 | 'info_dict': { | |
63 | 'id': 'commerce011514', | |
64 | 'ext': 'flv', | |
65 | 'title': 'Integrated Senate Video Player' | |
66 | } | |
67 | }, { | |
68 | 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', | |
69 | # checksum differs each time | |
70 | 'info_dict': { | |
71 | 'id': 'intel090613', | |
72 | 'ext': 'mp4', | |
73 | 'title': 'Integrated Senate Video Player' | |
74 | } | |
75 | }, { | |
76 | # From http://www.c-span.org/video/?96791-1 | |
77 | 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', | |
78 | 'only_matching': True, | |
79 | }] | |
80 | ||
81 | @staticmethod | |
82 | def _search_iframe_url(webpage): | |
83 | mobj = re.search( | |
84 | r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", | |
85 | webpage) | |
86 | if mobj: | |
87 | return mobj.group('url') | |
88 | ||
89 | def _get_info_for_comm(self, committee): | |
90 | for entry in self._COMM_MAP: | |
91 | if entry[0] == committee: | |
92 | return entry[1:] | |
93 | ||
94 | def _real_extract(self, url): | |
95 | url, smuggled_data = unsmuggle_url(url, {}) | |
96 | ||
97 | qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) | |
98 | if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): | |
99 | raise ExtractorError('Invalid URL', expected=True) | |
100 | ||
101 | video_id = re.sub(r'.mp4$', '', qs['filename'][0]) | |
102 | ||
103 | webpage = self._download_webpage(url, video_id) | |
104 | ||
105 | if smuggled_data.get('force_title'): | |
106 | title = smuggled_data['force_title'] | |
107 | else: | |
108 | title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) | |
109 | poster = qs.get('poster') | |
110 | thumbnail = poster[0] if poster else None | |
111 | ||
112 | video_type = qs['type'][0] | |
113 | committee = video_type if video_type == 'arch' else qs['comm'][0] | |
114 | stream_num, domain = self._get_info_for_comm(committee) | |
115 | ||
116 | formats = [] | |
117 | if video_type == 'arch': | |
118 | filename = video_id if '.' in video_id else video_id + '.mp4' | |
119 | formats = [{ | |
120 | # All parameters in the query string are necessary to prevent a 403 error | |
121 | 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=', | |
122 | }] | |
123 | else: | |
124 | hdcore_sign = '?hdcore=3.1.0' | |
125 | url_params = (domain, video_id, stream_num) | |
126 | f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign | |
127 | m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params | |
128 | for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): | |
129 | # URLs without the extra param induce an 404 error | |
130 | entry.update({'extra_param_to_segment_url': hdcore_sign}) | |
131 | formats.append(entry) | |
132 | for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): | |
133 | mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url']) | |
134 | if mobj: | |
135 | entry['format_id'] += mobj.group('tag') | |
136 | formats.append(entry) | |
137 | ||
138 | self._sort_formats(formats) | |
139 | ||
140 | return { | |
141 | 'id': video_id, | |
142 | 'title': title, | |
143 | 'formats': formats, | |
144 | 'thumbnail': thumbnail, | |
145 | } |