]>
Commit | Line | Data |
---|---|---|
c5469e04 S |
1 | from __future__ import unicode_literals |
2 | ||
b4444d5c JMF |
3 | import re |
4 | import json | |
082b1155 | 5 | import itertools |
b4444d5c JMF |
6 | |
7 | from .common import InfoExtractor | |
1cc79574 | 8 | from ..compat import ( |
cbf915f3 | 9 | compat_str, |
b00ca882 JMF |
10 | compat_urllib_parse_urlparse, |
11 | compat_urlparse, | |
1cc79574 PH |
12 | ) |
13 | from ..utils import ( | |
cbf915f3 PH |
14 | ExtractorError, |
15 | find_xpath_attr, | |
16 | int_or_none, | |
78338f71 | 17 | orderedSet, |
cbf915f3 | 18 | xpath_with_ns, |
b00ca882 | 19 | ) |
b4444d5c JMF |
20 | |
21 | ||
22 | class LivestreamIE(InfoExtractor): | |
c5469e04 | 23 | IE_NAME = 'livestream' |
af63fed7 | 24 | _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' |
22a6f150 | 25 | _TESTS = [{ |
c5469e04 S |
26 | 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', |
27 | 'md5': '53274c76ba7754fb0e8d072716f2292b', | |
28 | 'info_dict': { | |
29 | 'id': '4719370', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Live from Webster Hall NYC', | |
32 | 'upload_date': '20121012', | |
cbf915f3 PH |
33 | 'like_count': int, |
34 | 'view_count': int, | |
35 | 'thumbnail': 're:^http://.*\.jpg$' | |
b4444d5c | 36 | } |
22a6f150 PH |
37 | }, { |
38 | 'url': 'http://new.livestream.com/tedx/cityenglish', | |
39 | 'info_dict': { | |
40 | 'title': 'TEDCity2.0 (English)', | |
1def5f35 | 41 | 'id': '2245590', |
22a6f150 PH |
42 | }, |
43 | 'playlist_mincount': 4, | |
082b1155 JMF |
44 | }, { |
45 | 'url': 'http://new.livestream.com/chess24/tatasteelchess', | |
46 | 'info_dict': { | |
47 | 'title': 'Tata Steel Chess', | |
48 | 'id': '3705884', | |
49 | }, | |
50 | 'playlist_mincount': 60, | |
af63fed7 PH |
51 | }, { |
52 | 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', | |
53 | 'only_matching': True, | |
22a6f150 | 54 | }] |
b4444d5c | 55 | |
8f3034d8 PH |
56 | def _parse_smil(self, video_id, smil_url): |
57 | formats = [] | |
58 | _SWITCH_XPATH = ( | |
59 | './/{http://www.w3.org/2001/SMIL20/Language}body/' | |
60 | '{http://www.w3.org/2001/SMIL20/Language}switch') | |
61 | smil_doc = self._download_xml( | |
62 | smil_url, video_id, | |
63 | note='Downloading SMIL information', | |
64 | errnote='Unable to download SMIL information', | |
65 | fatal=False) | |
66 | if smil_doc is False: # Download failed | |
67 | return formats | |
68 | title_node = find_xpath_attr( | |
69 | smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta', | |
70 | 'name', 'title') | |
71 | if title_node is None: | |
72 | self.report_warning('Cannot find SMIL id') | |
73 | switch_node = smil_doc.find(_SWITCH_XPATH) | |
74 | else: | |
75 | title_id = title_node.attrib['content'] | |
76 | switch_node = find_xpath_attr( | |
77 | smil_doc, _SWITCH_XPATH, 'id', title_id) | |
78 | if switch_node is None: | |
79 | raise ExtractorError('Cannot find switch node') | |
80 | video_nodes = switch_node.findall( | |
81 | '{http://www.w3.org/2001/SMIL20/Language}video') | |
82 | ||
83 | for vn in video_nodes: | |
84 | tbr = int_or_none(vn.attrib.get('system-bitrate')) | |
85 | furl = ( | |
86 | 'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' % | |
87 | (vn.attrib['src'])) | |
88 | if 'clipBegin' in vn.attrib: | |
89 | furl += '&ssek=' + vn.attrib['clipBegin'] | |
90 | formats.append({ | |
91 | 'url': furl, | |
92 | 'format_id': 'smil_%d' % tbr, | |
93 | 'ext': 'flv', | |
94 | 'tbr': tbr, | |
95 | 'preference': -1000, | |
96 | }) | |
97 | return formats | |
98 | ||
b4444d5c | 99 | def _extract_video_info(self, video_data): |
cbf915f3 PH |
100 | video_id = compat_str(video_data['id']) |
101 | ||
102 | FORMAT_KEYS = ( | |
103 | ('sd', 'progressive_url'), | |
104 | ('hd', 'progressive_url_hd'), | |
72e785f3 | 105 | ) |
cbf915f3 PH |
106 | formats = [{ |
107 | 'format_id': format_id, | |
108 | 'url': video_data[key], | |
109 | 'quality': i + 1, | |
110 | } for i, (format_id, key) in enumerate(FORMAT_KEYS) | |
111 | if video_data.get(key)] | |
112 | ||
113 | smil_url = video_data.get('smil_url') | |
114 | if smil_url: | |
8f3034d8 | 115 | formats.extend(self._parse_smil(video_id, smil_url)) |
cbf915f3 PH |
116 | self._sort_formats(formats) |
117 | ||
c5469e04 | 118 | return { |
cbf915f3 PH |
119 | 'id': video_id, |
120 | 'formats': formats, | |
c5469e04 | 121 | 'title': video_data['caption'], |
cbf915f3 | 122 | 'thumbnail': video_data.get('thumbnail_url'), |
c5469e04 | 123 | 'upload_date': video_data['updated_at'].replace('-', '')[:8], |
cbf915f3 PH |
124 | 'like_count': video_data.get('likes', {}).get('total'), |
125 | 'view_count': video_data.get('views'), | |
c5469e04 | 126 | } |
b4444d5c | 127 | |
082b1155 JMF |
128 | def _extract_event(self, info): |
129 | event_id = compat_str(info['id']) | |
130 | account = compat_str(info['owner_account_id']) | |
131 | root_url = ( | |
132 | 'https://new.livestream.com/api/accounts/{account}/events/{event}/' | |
133 | 'feed.json'.format(account=account, event=event_id)) | |
134 | ||
135 | def _extract_videos(): | |
136 | last_video = None | |
137 | for i in itertools.count(1): | |
138 | if last_video is None: | |
139 | info_url = root_url | |
140 | else: | |
141 | info_url = '{root}?&id={id}&newer=-1&type=video'.format( | |
142 | root=root_url, id=last_video) | |
143 | videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data'] | |
144 | videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] | |
145 | if not videos_info: | |
146 | break | |
147 | for v in videos_info: | |
148 | yield self._extract_video_info(v) | |
149 | last_video = videos_info[-1]['id'] | |
150 | return self.playlist_result(_extract_videos(), event_id, info['full_name']) | |
151 | ||
b4444d5c JMF |
152 | def _real_extract(self, url): |
153 | mobj = re.match(self._VALID_URL, url) | |
154 | video_id = mobj.group('id') | |
155 | event_name = mobj.group('event_name') | |
156 | webpage = self._download_webpage(url, video_id or event_name) | |
157 | ||
22a6f150 PH |
158 | og_video = self._og_search_video_url( |
159 | webpage, 'player url', fatal=False, default=None) | |
160 | if og_video is not None: | |
b4444d5c JMF |
161 | query_str = compat_urllib_parse_urlparse(og_video).query |
162 | query = compat_urlparse.parse_qs(query_str) | |
22a6f150 PH |
163 | if 'play_url' in query: |
164 | api_url = query['play_url'][0].replace('.smil', '') | |
165 | info = json.loads(self._download_webpage( | |
166 | api_url, video_id, 'Downloading video info')) | |
167 | return self._extract_video_info(info) | |
168 | ||
169 | config_json = self._search_regex( | |
170 | r'window.config = ({.*?});', webpage, 'window config') | |
171 | info = json.loads(config_json)['event'] | |
172 | ||
173 | def is_relevant(vdata, vid): | |
174 | result = vdata['type'] == 'video' | |
175 | if video_id is not None: | |
176 | result = result and compat_str(vdata['data']['id']) == vid | |
177 | return result | |
178 | ||
22a6f150 PH |
179 | if video_id is None: |
180 | # This is an event page: | |
082b1155 | 181 | return self._extract_event(info) |
22a6f150 | 182 | else: |
082b1155 JMF |
183 | videos = [self._extract_video_info(video_data['data']) |
184 | for video_data in info['feed']['data'] | |
185 | if is_relevant(video_data, video_id)] | |
22a6f150 PH |
186 | if not videos: |
187 | raise ExtractorError('Cannot find video %s' % video_id) | |
188 | return videos[0] | |
c66d2baa JMF |
189 | |
190 | ||
191 | # The original version of Livestream uses a different system | |
192 | class LivestreamOriginalIE(InfoExtractor): | |
c5469e04 | 193 | IE_NAME = 'livestream:original' |
78338f71 JMF |
194 | _VALID_URL = r'''(?x)https?://www\.livestream\.com/ |
195 | (?P<user>[^/]+)/(?P<type>video|folder) | |
196 | (?:\?.*?Id=|/)(?P<id>.*?)(&|$) | |
197 | ''' | |
22a6f150 | 198 | _TESTS = [{ |
c5469e04 S |
199 | 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', |
200 | 'info_dict': { | |
201 | 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | |
202 | 'ext': 'flv', | |
203 | 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', | |
c66d2baa | 204 | }, |
c5469e04 | 205 | 'params': { |
c66d2baa | 206 | # rtmp |
c5469e04 | 207 | 'skip_download': True, |
c66d2baa | 208 | }, |
22a6f150 PH |
209 | }, { |
210 | 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', | |
211 | 'info_dict': { | |
212 | 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3', | |
213 | }, | |
214 | 'playlist_mincount': 4, | |
215 | }] | |
c66d2baa | 216 | |
78338f71 | 217 | def _extract_video(self, user, video_id): |
c66d2baa JMF |
218 | api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) |
219 | ||
e26f8712 | 220 | info = self._download_xml(api_url, video_id) |
c66d2baa JMF |
221 | item = info.find('channel').find('item') |
222 | ns = {'media': 'http://search.yahoo.com/mrss'} | |
223 | thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] | |
224 | # Remove the extension and number from the path (like 1.jpg) | |
c5469e04 | 225 | path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') |
c66d2baa JMF |
226 | |
227 | return { | |
228 | 'id': video_id, | |
229 | 'title': item.find('title').text, | |
230 | 'url': 'rtmp://extondemand.livestream.com/ondemand', | |
83855f3a PH |
231 | 'play_path': 'trans/dv15/mogulus-{0}'.format(path), |
232 | 'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque', | |
c66d2baa JMF |
233 | 'ext': 'flv', |
234 | 'thumbnail': thumbnail_url, | |
235 | } | |
78338f71 JMF |
236 | |
237 | def _extract_folder(self, url, folder_id): | |
238 | webpage = self._download_webpage(url, folder_id) | |
22a6f150 PH |
239 | paths = orderedSet(re.findall( |
240 | r'''(?x)(?: | |
241 | <li\s+class="folder">\s*<a\s+href="| | |
242 | <a\s+href="(?=https?://livestre\.am/) | |
243 | )([^"]+)"''', webpage)) | |
78338f71 JMF |
244 | |
245 | return { | |
246 | '_type': 'playlist', | |
247 | 'id': folder_id, | |
248 | 'entries': [{ | |
249 | '_type': 'url', | |
22a6f150 PH |
250 | 'url': compat_urlparse.urljoin(url, p), |
251 | } for p in paths], | |
78338f71 JMF |
252 | } |
253 | ||
254 | def _real_extract(self, url): | |
255 | mobj = re.match(self._VALID_URL, url) | |
256 | id = mobj.group('id') | |
257 | user = mobj.group('user') | |
258 | url_type = mobj.group('type') | |
259 | if url_type == 'folder': | |
260 | return self._extract_folder(url, id) | |
261 | else: | |
262 | return self._extract_video(user, id) | |
263 | ||
264 | ||
265 | # The server doesn't support HEAD request, the generic extractor can't detect | |
266 | # the redirection | |
267 | class LivestreamShortenerIE(InfoExtractor): | |
268 | IE_NAME = 'livestream:shortener' | |
269 | IE_DESC = False # Do not list | |
270 | _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' | |
271 | ||
272 | def _real_extract(self, url): | |
273 | mobj = re.match(self._VALID_URL, url) | |
274 | id = mobj.group('id') | |
275 | webpage = self._download_webpage(url, id) | |
276 | ||
277 | return { | |
278 | '_type': 'url', | |
279 | 'url': self._og_search_url(webpage), | |
280 | } |