]>
Commit | Line | Data |
---|---|---|
c6391cd5 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | from .common import InfoExtractor | |
2fe1b5bd YCH |
6 | from ..utils import ( |
7 | ExtractorError, | |
8 | unsmuggle_url, | |
9 | ) | |
c6391cd5 YCH |
10 | from ..compat import ( |
11 | compat_parse_qs, | |
12 | compat_urlparse, | |
13 | ) | |
14 | ||
15 | ||
16 | class SenateISVPIE(InfoExtractor): | |
17 | _COMM_MAP = [ | |
611c1dd9 S |
18 | ['ag', '76440', 'http://ag-f.akamaihd.net'], |
19 | ['aging', '76442', 'http://aging-f.akamaihd.net'], | |
20 | ['approps', '76441', 'http://approps-f.akamaihd.net'], | |
21 | ['armed', '76445', 'http://armed-f.akamaihd.net'], | |
22 | ['banking', '76446', 'http://banking-f.akamaihd.net'], | |
23 | ['budget', '76447', 'http://budget-f.akamaihd.net'], | |
24 | ['cecc', '76486', 'http://srs-f.akamaihd.net'], | |
25 | ['commerce', '80177', 'http://commerce1-f.akamaihd.net'], | |
26 | ['csce', '75229', 'http://srs-f.akamaihd.net'], | |
27 | ['dpc', '76590', 'http://dpc-f.akamaihd.net'], | |
28 | ['energy', '76448', 'http://energy-f.akamaihd.net'], | |
29 | ['epw', '76478', 'http://epw-f.akamaihd.net'], | |
30 | ['ethics', '76449', 'http://ethics-f.akamaihd.net'], | |
31 | ['finance', '76450', 'http://finance-f.akamaihd.net'], | |
32 | ['foreign', '76451', 'http://foreign-f.akamaihd.net'], | |
33 | ['govtaff', '76453', 'http://govtaff-f.akamaihd.net'], | |
34 | ['help', '76452', 'http://help-f.akamaihd.net'], | |
35 | ['indian', '76455', 'http://indian-f.akamaihd.net'], | |
36 | ['intel', '76456', 'http://intel-f.akamaihd.net'], | |
37 | ['intlnarc', '76457', 'http://intlnarc-f.akamaihd.net'], | |
38 | ['jccic', '85180', 'http://jccic-f.akamaihd.net'], | |
39 | ['jec', '76458', 'http://jec-f.akamaihd.net'], | |
40 | ['judiciary', '76459', 'http://judiciary-f.akamaihd.net'], | |
41 | ['rpc', '76591', 'http://rpc-f.akamaihd.net'], | |
42 | ['rules', '76460', 'http://rules-f.akamaihd.net'], | |
43 | ['saa', '76489', 'http://srs-f.akamaihd.net'], | |
44 | ['smbiz', '76461', 'http://smbiz-f.akamaihd.net'], | |
45 | ['srs', '75229', 'http://srs-f.akamaihd.net'], | |
46 | ['uscc', '76487', 'http://srs-f.akamaihd.net'], | |
47 | ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], | |
48 | ['arch', '', 'http://ussenate-f.akamaihd.net/'] | |
c6391cd5 YCH |
49 | ] |
50 | _IE_NAME = 'senate.gov' | |
92519402 | 51 | _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' |
c6391cd5 YCH |
52 | _TESTS = [{ |
53 | 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', | |
c6391cd5 YCH |
54 | 'info_dict': { |
55 | 'id': 'judiciary031715', | |
eb6fc7d3 | 56 | 'ext': 'mp4', |
c6391cd5 | 57 | 'title': 'Integrated Senate Video Player', |
ec85ded8 | 58 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', |
eb6fc7d3 YCH |
59 | }, |
60 | 'params': { | |
61 | # m3u8 download | |
62 | 'skip_download': True, | |
63 | }, | |
c6391cd5 YCH |
64 | }, { |
65 | 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', | |
c6391cd5 YCH |
66 | 'info_dict': { |
67 | 'id': 'commerce011514', | |
eb6fc7d3 | 68 | 'ext': 'mp4', |
c6391cd5 | 69 | 'title': 'Integrated Senate Video Player' |
eb6fc7d3 YCH |
70 | }, |
71 | 'params': { | |
72 | # m3u8 download | |
73 | 'skip_download': True, | |
74 | }, | |
c6391cd5 YCH |
75 | }, { |
76 | 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', | |
77 | # checksum differs each time | |
78 | 'info_dict': { | |
79 | 'id': 'intel090613', | |
80 | 'ext': 'mp4', | |
81 | 'title': 'Integrated Senate Video Player' | |
82 | } | |
84e1e036 YCH |
83 | }, { |
84 | # From http://www.c-span.org/video/?96791-1 | |
85 | 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', | |
86 | 'only_matching': True, | |
c6391cd5 YCH |
87 | }] |
88 | ||
2fe1b5bd YCH |
89 | @staticmethod |
90 | def _search_iframe_url(webpage): | |
91 | mobj = re.search( | |
8a8cc339 | 92 | r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", |
2fe1b5bd YCH |
93 | webpage) |
94 | if mobj: | |
95 | return mobj.group('url') | |
96 | ||
c6391cd5 YCH |
97 | def _get_info_for_comm(self, committee): |
98 | for entry in self._COMM_MAP: | |
99 | if entry[0] == committee: | |
100 | return entry[1:] | |
101 | ||
102 | def _real_extract(self, url): | |
2fe1b5bd YCH |
103 | url, smuggled_data = unsmuggle_url(url, {}) |
104 | ||
c6391cd5 YCH |
105 | qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) |
106 | if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): | |
107 | raise ExtractorError('Invalid URL', expected=True) | |
108 | ||
109 | video_id = re.sub(r'.mp4$', '', qs['filename'][0]) | |
110 | ||
111 | webpage = self._download_webpage(url, video_id) | |
112 | ||
2fe1b5bd YCH |
113 | if smuggled_data.get('force_title'): |
114 | title = smuggled_data['force_title'] | |
115 | else: | |
116 | title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) | |
f91e1a87 | 117 | poster = qs.get('poster') |
13a11b19 | 118 | thumbnail = poster[0] if poster else None |
c6391cd5 YCH |
119 | |
120 | video_type = qs['type'][0] | |
121 | committee = video_type if video_type == 'arch' else qs['comm'][0] | |
122 | stream_num, domain = self._get_info_for_comm(committee) | |
123 | ||
124 | formats = [] | |
125 | if video_type == 'arch': | |
126 | filename = video_id if '.' in video_id else video_id + '.mp4' | |
127 | formats = [{ | |
128 | # All parameters in the query string are necessary to prevent a 403 error | |
129 | 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=', | |
130 | }] | |
131 | else: | |
ec29539e | 132 | hdcore_sign = 'hdcore=3.1.0' |
c6391cd5 | 133 | url_params = (domain, video_id, stream_num) |
ec29539e | 134 | f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign |
c6391cd5 YCH |
135 | m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params |
136 | for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): | |
137 | # URLs without the extra param induce an 404 error | |
138 | entry.update({'extra_param_to_segment_url': hdcore_sign}) | |
139 | formats.append(entry) | |
140 | for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): | |
141 | mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url']) | |
142 | if mobj: | |
143 | entry['format_id'] += mobj.group('tag') | |
144 | formats.append(entry) | |
145 | ||
146 | self._sort_formats(formats) | |
147 | ||
5c1e6f69 | 148 | return { |
c6391cd5 YCH |
149 | 'id': video_id, |
150 | 'title': title, | |
5c1e6f69 | 151 | 'formats': formats, |
f91e1a87 | 152 | 'thumbnail': thumbnail, |
c6391cd5 | 153 | } |