]>
Commit | Line | Data |
---|---|---|
c5469e04 S |
1 | from __future__ import unicode_literals |
2 | ||
b4444d5c JMF |
3 | import re |
4 | import json | |
5 | ||
6 | from .common import InfoExtractor | |
1cc79574 | 7 | from ..compat import ( |
cbf915f3 | 8 | compat_str, |
b00ca882 JMF |
9 | compat_urllib_parse_urlparse, |
10 | compat_urlparse, | |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
cbf915f3 PH |
13 | ExtractorError, |
14 | find_xpath_attr, | |
15 | int_or_none, | |
78338f71 | 16 | orderedSet, |
cbf915f3 | 17 | xpath_with_ns, |
b00ca882 | 18 | ) |
b4444d5c JMF |
19 | |
20 | ||
21 | class LivestreamIE(InfoExtractor): | |
c5469e04 | 22 | IE_NAME = 'livestream' |
af63fed7 | 23 | _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' |
22a6f150 | 24 | _TESTS = [{ |
c5469e04 S |
25 | 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', |
26 | 'md5': '53274c76ba7754fb0e8d072716f2292b', | |
27 | 'info_dict': { | |
28 | 'id': '4719370', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Live from Webster Hall NYC', | |
31 | 'upload_date': '20121012', | |
cbf915f3 PH |
32 | 'like_count': int, |
33 | 'view_count': int, | |
34 | 'thumbnail': 're:^http://.*\.jpg$' | |
b4444d5c | 35 | } |
22a6f150 PH |
36 | }, { |
37 | 'url': 'http://new.livestream.com/tedx/cityenglish', | |
38 | 'info_dict': { | |
39 | 'title': 'TEDCity2.0 (English)', | |
1def5f35 | 40 | 'id': '2245590', |
22a6f150 PH |
41 | }, |
42 | 'playlist_mincount': 4, | |
af63fed7 PH |
43 | }, { |
44 | 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', | |
45 | 'only_matching': True, | |
22a6f150 | 46 | }] |
b4444d5c | 47 | |
8f3034d8 PH |
48 | def _parse_smil(self, video_id, smil_url): |
49 | formats = [] | |
50 | _SWITCH_XPATH = ( | |
51 | './/{http://www.w3.org/2001/SMIL20/Language}body/' | |
52 | '{http://www.w3.org/2001/SMIL20/Language}switch') | |
53 | smil_doc = self._download_xml( | |
54 | smil_url, video_id, | |
55 | note='Downloading SMIL information', | |
56 | errnote='Unable to download SMIL information', | |
57 | fatal=False) | |
58 | if smil_doc is False: # Download failed | |
59 | return formats | |
60 | title_node = find_xpath_attr( | |
61 | smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta', | |
62 | 'name', 'title') | |
63 | if title_node is None: | |
64 | self.report_warning('Cannot find SMIL id') | |
65 | switch_node = smil_doc.find(_SWITCH_XPATH) | |
66 | else: | |
67 | title_id = title_node.attrib['content'] | |
68 | switch_node = find_xpath_attr( | |
69 | smil_doc, _SWITCH_XPATH, 'id', title_id) | |
70 | if switch_node is None: | |
71 | raise ExtractorError('Cannot find switch node') | |
72 | video_nodes = switch_node.findall( | |
73 | '{http://www.w3.org/2001/SMIL20/Language}video') | |
74 | ||
75 | for vn in video_nodes: | |
76 | tbr = int_or_none(vn.attrib.get('system-bitrate')) | |
77 | furl = ( | |
78 | 'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' % | |
79 | (vn.attrib['src'])) | |
80 | if 'clipBegin' in vn.attrib: | |
81 | furl += '&ssek=' + vn.attrib['clipBegin'] | |
82 | formats.append({ | |
83 | 'url': furl, | |
84 | 'format_id': 'smil_%d' % tbr, | |
85 | 'ext': 'flv', | |
86 | 'tbr': tbr, | |
87 | 'preference': -1000, | |
88 | }) | |
89 | return formats | |
90 | ||
b4444d5c | 91 | def _extract_video_info(self, video_data): |
cbf915f3 PH |
92 | video_id = compat_str(video_data['id']) |
93 | ||
94 | FORMAT_KEYS = ( | |
95 | ('sd', 'progressive_url'), | |
96 | ('hd', 'progressive_url_hd'), | |
72e785f3 | 97 | ) |
cbf915f3 PH |
98 | formats = [{ |
99 | 'format_id': format_id, | |
100 | 'url': video_data[key], | |
101 | 'quality': i + 1, | |
102 | } for i, (format_id, key) in enumerate(FORMAT_KEYS) | |
103 | if video_data.get(key)] | |
104 | ||
105 | smil_url = video_data.get('smil_url') | |
106 | if smil_url: | |
8f3034d8 | 107 | formats.extend(self._parse_smil(video_id, smil_url)) |
cbf915f3 PH |
108 | self._sort_formats(formats) |
109 | ||
c5469e04 | 110 | return { |
cbf915f3 PH |
111 | 'id': video_id, |
112 | 'formats': formats, | |
c5469e04 | 113 | 'title': video_data['caption'], |
cbf915f3 | 114 | 'thumbnail': video_data.get('thumbnail_url'), |
c5469e04 | 115 | 'upload_date': video_data['updated_at'].replace('-', '')[:8], |
cbf915f3 PH |
116 | 'like_count': video_data.get('likes', {}).get('total'), |
117 | 'view_count': video_data.get('views'), | |
c5469e04 | 118 | } |
b4444d5c JMF |
119 | |
120 | def _real_extract(self, url): | |
121 | mobj = re.match(self._VALID_URL, url) | |
122 | video_id = mobj.group('id') | |
123 | event_name = mobj.group('event_name') | |
124 | webpage = self._download_webpage(url, video_id or event_name) | |
125 | ||
22a6f150 PH |
126 | og_video = self._og_search_video_url( |
127 | webpage, 'player url', fatal=False, default=None) | |
128 | if og_video is not None: | |
b4444d5c JMF |
129 | query_str = compat_urllib_parse_urlparse(og_video).query |
130 | query = compat_urlparse.parse_qs(query_str) | |
22a6f150 PH |
131 | if 'play_url' in query: |
132 | api_url = query['play_url'][0].replace('.smil', '') | |
133 | info = json.loads(self._download_webpage( | |
134 | api_url, video_id, 'Downloading video info')) | |
135 | return self._extract_video_info(info) | |
136 | ||
137 | config_json = self._search_regex( | |
138 | r'window.config = ({.*?});', webpage, 'window config') | |
139 | info = json.loads(config_json)['event'] | |
140 | ||
141 | def is_relevant(vdata, vid): | |
142 | result = vdata['type'] == 'video' | |
143 | if video_id is not None: | |
144 | result = result and compat_str(vdata['data']['id']) == vid | |
145 | return result | |
146 | ||
147 | videos = [self._extract_video_info(video_data['data']) | |
148 | for video_data in info['feed']['data'] | |
149 | if is_relevant(video_data, video_id)] | |
150 | if video_id is None: | |
151 | # This is an event page: | |
1def5f35 PH |
152 | return self.playlist_result( |
153 | videos, '%s' % info['id'], info['full_name']) | |
22a6f150 PH |
154 | else: |
155 | if not videos: | |
156 | raise ExtractorError('Cannot find video %s' % video_id) | |
157 | return videos[0] | |
c66d2baa JMF |
158 | |
159 | ||
160 | # The original version of Livestream uses a different system | |
161 | class LivestreamOriginalIE(InfoExtractor): | |
c5469e04 | 162 | IE_NAME = 'livestream:original' |
78338f71 JMF |
163 | _VALID_URL = r'''(?x)https?://www\.livestream\.com/ |
164 | (?P<user>[^/]+)/(?P<type>video|folder) | |
165 | (?:\?.*?Id=|/)(?P<id>.*?)(&|$) | |
166 | ''' | |
22a6f150 | 167 | _TESTS = [{ |
c5469e04 S |
168 | 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', |
169 | 'info_dict': { | |
170 | 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | |
171 | 'ext': 'flv', | |
172 | 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', | |
c66d2baa | 173 | }, |
c5469e04 | 174 | 'params': { |
c66d2baa | 175 | # rtmp |
c5469e04 | 176 | 'skip_download': True, |
c66d2baa | 177 | }, |
22a6f150 PH |
178 | }, { |
179 | 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', | |
180 | 'info_dict': { | |
181 | 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3', | |
182 | }, | |
183 | 'playlist_mincount': 4, | |
184 | }] | |
c66d2baa | 185 | |
78338f71 | 186 | def _extract_video(self, user, video_id): |
c66d2baa JMF |
187 | api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) |
188 | ||
e26f8712 | 189 | info = self._download_xml(api_url, video_id) |
c66d2baa JMF |
190 | item = info.find('channel').find('item') |
191 | ns = {'media': 'http://search.yahoo.com/mrss'} | |
192 | thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] | |
193 | # Remove the extension and number from the path (like 1.jpg) | |
c5469e04 | 194 | path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') |
c66d2baa JMF |
195 | |
196 | return { | |
197 | 'id': video_id, | |
198 | 'title': item.find('title').text, | |
199 | 'url': 'rtmp://extondemand.livestream.com/ondemand', | |
83855f3a PH |
200 | 'play_path': 'trans/dv15/mogulus-{0}'.format(path), |
201 | 'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque', | |
c66d2baa JMF |
202 | 'ext': 'flv', |
203 | 'thumbnail': thumbnail_url, | |
204 | } | |
78338f71 JMF |
205 | |
206 | def _extract_folder(self, url, folder_id): | |
207 | webpage = self._download_webpage(url, folder_id) | |
22a6f150 PH |
208 | paths = orderedSet(re.findall( |
209 | r'''(?x)(?: | |
210 | <li\s+class="folder">\s*<a\s+href="| | |
211 | <a\s+href="(?=https?://livestre\.am/) | |
212 | )([^"]+)"''', webpage)) | |
78338f71 JMF |
213 | |
214 | return { | |
215 | '_type': 'playlist', | |
216 | 'id': folder_id, | |
217 | 'entries': [{ | |
218 | '_type': 'url', | |
22a6f150 PH |
219 | 'url': compat_urlparse.urljoin(url, p), |
220 | } for p in paths], | |
78338f71 JMF |
221 | } |
222 | ||
223 | def _real_extract(self, url): | |
224 | mobj = re.match(self._VALID_URL, url) | |
225 | id = mobj.group('id') | |
226 | user = mobj.group('user') | |
227 | url_type = mobj.group('type') | |
228 | if url_type == 'folder': | |
229 | return self._extract_folder(url, id) | |
230 | else: | |
231 | return self._extract_video(user, id) | |
232 | ||
233 | ||
234 | # The server doesn't support HEAD request, the generic extractor can't detect | |
235 | # the redirection | |
236 | class LivestreamShortenerIE(InfoExtractor): | |
237 | IE_NAME = 'livestream:shortener' | |
238 | IE_DESC = False # Do not list | |
239 | _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' | |
240 | ||
241 | def _real_extract(self, url): | |
242 | mobj = re.match(self._VALID_URL, url) | |
243 | id = mobj.group('id') | |
244 | webpage = self._download_webpage(url, id) | |
245 | ||
246 | return { | |
247 | '_type': 'url', | |
248 | 'url': self._og_search_url(webpage), | |
249 | } |