]>
Commit | Line | Data |
---|---|---|
c5469e04 S |
1 | from __future__ import unicode_literals |
2 | ||
b4444d5c JMF |
3 | import re |
4 | import json | |
5 | ||
6 | from .common import InfoExtractor | |
b00ca882 | 7 | from ..utils import ( |
cbf915f3 | 8 | compat_str, |
b00ca882 JMF |
9 | compat_urllib_parse_urlparse, |
10 | compat_urlparse, | |
cbf915f3 PH |
11 | ExtractorError, |
12 | find_xpath_attr, | |
13 | int_or_none, | |
78338f71 | 14 | orderedSet, |
cbf915f3 | 15 | xpath_with_ns, |
b00ca882 | 16 | ) |
b4444d5c JMF |
17 | |
18 | ||
19 | class LivestreamIE(InfoExtractor): | |
c5469e04 | 20 | IE_NAME = 'livestream' |
af63fed7 | 21 | _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' |
22a6f150 | 22 | _TESTS = [{ |
c5469e04 S |
23 | 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', |
24 | 'md5': '53274c76ba7754fb0e8d072716f2292b', | |
25 | 'info_dict': { | |
26 | 'id': '4719370', | |
27 | 'ext': 'mp4', | |
28 | 'title': 'Live from Webster Hall NYC', | |
29 | 'upload_date': '20121012', | |
cbf915f3 PH |
30 | 'like_count': int, |
31 | 'view_count': int, | |
32 | 'thumbnail': 're:^http://.*\.jpg$' | |
b4444d5c | 33 | } |
22a6f150 PH |
34 | }, { |
35 | 'url': 'http://new.livestream.com/tedx/cityenglish', | |
36 | 'info_dict': { | |
37 | 'title': 'TEDCity2.0 (English)', | |
38 | }, | |
39 | 'playlist_mincount': 4, | |
af63fed7 PH |
40 | }, { |
41 | 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', | |
42 | 'only_matching': True, | |
22a6f150 | 43 | }] |
b4444d5c | 44 | |
8f3034d8 PH |
45 | def _parse_smil(self, video_id, smil_url): |
46 | formats = [] | |
47 | _SWITCH_XPATH = ( | |
48 | './/{http://www.w3.org/2001/SMIL20/Language}body/' | |
49 | '{http://www.w3.org/2001/SMIL20/Language}switch') | |
50 | smil_doc = self._download_xml( | |
51 | smil_url, video_id, | |
52 | note='Downloading SMIL information', | |
53 | errnote='Unable to download SMIL information', | |
54 | fatal=False) | |
55 | if smil_doc is False: # Download failed | |
56 | return formats | |
57 | title_node = find_xpath_attr( | |
58 | smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta', | |
59 | 'name', 'title') | |
60 | if title_node is None: | |
61 | self.report_warning('Cannot find SMIL id') | |
62 | switch_node = smil_doc.find(_SWITCH_XPATH) | |
63 | else: | |
64 | title_id = title_node.attrib['content'] | |
65 | switch_node = find_xpath_attr( | |
66 | smil_doc, _SWITCH_XPATH, 'id', title_id) | |
67 | if switch_node is None: | |
68 | raise ExtractorError('Cannot find switch node') | |
69 | video_nodes = switch_node.findall( | |
70 | '{http://www.w3.org/2001/SMIL20/Language}video') | |
71 | ||
72 | for vn in video_nodes: | |
73 | tbr = int_or_none(vn.attrib.get('system-bitrate')) | |
74 | furl = ( | |
75 | 'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' % | |
76 | (vn.attrib['src'])) | |
77 | if 'clipBegin' in vn.attrib: | |
78 | furl += '&ssek=' + vn.attrib['clipBegin'] | |
79 | formats.append({ | |
80 | 'url': furl, | |
81 | 'format_id': 'smil_%d' % tbr, | |
82 | 'ext': 'flv', | |
83 | 'tbr': tbr, | |
84 | 'preference': -1000, | |
85 | }) | |
86 | return formats | |
87 | ||
b4444d5c | 88 | def _extract_video_info(self, video_data): |
cbf915f3 PH |
89 | video_id = compat_str(video_data['id']) |
90 | ||
91 | FORMAT_KEYS = ( | |
92 | ('sd', 'progressive_url'), | |
93 | ('hd', 'progressive_url_hd'), | |
72e785f3 | 94 | ) |
cbf915f3 PH |
95 | formats = [{ |
96 | 'format_id': format_id, | |
97 | 'url': video_data[key], | |
98 | 'quality': i + 1, | |
99 | } for i, (format_id, key) in enumerate(FORMAT_KEYS) | |
100 | if video_data.get(key)] | |
101 | ||
102 | smil_url = video_data.get('smil_url') | |
103 | if smil_url: | |
8f3034d8 | 104 | formats.extend(self._parse_smil(video_id, smil_url)) |
cbf915f3 PH |
105 | self._sort_formats(formats) |
106 | ||
c5469e04 | 107 | return { |
cbf915f3 PH |
108 | 'id': video_id, |
109 | 'formats': formats, | |
c5469e04 | 110 | 'title': video_data['caption'], |
cbf915f3 | 111 | 'thumbnail': video_data.get('thumbnail_url'), |
c5469e04 | 112 | 'upload_date': video_data['updated_at'].replace('-', '')[:8], |
cbf915f3 PH |
113 | 'like_count': video_data.get('likes', {}).get('total'), |
114 | 'view_count': video_data.get('views'), | |
c5469e04 | 115 | } |
b4444d5c JMF |
116 | |
117 | def _real_extract(self, url): | |
118 | mobj = re.match(self._VALID_URL, url) | |
119 | video_id = mobj.group('id') | |
120 | event_name = mobj.group('event_name') | |
121 | webpage = self._download_webpage(url, video_id or event_name) | |
122 | ||
22a6f150 PH |
123 | og_video = self._og_search_video_url( |
124 | webpage, 'player url', fatal=False, default=None) | |
125 | if og_video is not None: | |
b4444d5c JMF |
126 | query_str = compat_urllib_parse_urlparse(og_video).query |
127 | query = compat_urlparse.parse_qs(query_str) | |
22a6f150 PH |
128 | if 'play_url' in query: |
129 | api_url = query['play_url'][0].replace('.smil', '') | |
130 | info = json.loads(self._download_webpage( | |
131 | api_url, video_id, 'Downloading video info')) | |
132 | return self._extract_video_info(info) | |
133 | ||
134 | config_json = self._search_regex( | |
135 | r'window.config = ({.*?});', webpage, 'window config') | |
136 | info = json.loads(config_json)['event'] | |
137 | ||
138 | def is_relevant(vdata, vid): | |
139 | result = vdata['type'] == 'video' | |
140 | if video_id is not None: | |
141 | result = result and compat_str(vdata['data']['id']) == vid | |
142 | return result | |
143 | ||
144 | videos = [self._extract_video_info(video_data['data']) | |
145 | for video_data in info['feed']['data'] | |
146 | if is_relevant(video_data, video_id)] | |
147 | if video_id is None: | |
148 | # This is an event page: | |
149 | return self.playlist_result(videos, info['id'], info['full_name']) | |
150 | else: | |
151 | if not videos: | |
152 | raise ExtractorError('Cannot find video %s' % video_id) | |
153 | return videos[0] | |
c66d2baa JMF |
154 | |
155 | ||
156 | # The original version of Livestream uses a different system | |
157 | class LivestreamOriginalIE(InfoExtractor): | |
c5469e04 | 158 | IE_NAME = 'livestream:original' |
78338f71 JMF |
159 | _VALID_URL = r'''(?x)https?://www\.livestream\.com/ |
160 | (?P<user>[^/]+)/(?P<type>video|folder) | |
161 | (?:\?.*?Id=|/)(?P<id>.*?)(&|$) | |
162 | ''' | |
22a6f150 | 163 | _TESTS = [{ |
c5469e04 S |
164 | 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', |
165 | 'info_dict': { | |
166 | 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | |
167 | 'ext': 'flv', | |
168 | 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', | |
c66d2baa | 169 | }, |
c5469e04 | 170 | 'params': { |
c66d2baa | 171 | # rtmp |
c5469e04 | 172 | 'skip_download': True, |
c66d2baa | 173 | }, |
22a6f150 PH |
174 | }, { |
175 | 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', | |
176 | 'info_dict': { | |
177 | 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3', | |
178 | }, | |
179 | 'playlist_mincount': 4, | |
180 | }] | |
c66d2baa | 181 | |
78338f71 | 182 | def _extract_video(self, user, video_id): |
c66d2baa JMF |
183 | api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) |
184 | ||
e26f8712 | 185 | info = self._download_xml(api_url, video_id) |
c66d2baa JMF |
186 | item = info.find('channel').find('item') |
187 | ns = {'media': 'http://search.yahoo.com/mrss'} | |
188 | thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] | |
189 | # Remove the extension and number from the path (like 1.jpg) | |
c5469e04 | 190 | path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') |
c66d2baa JMF |
191 | |
192 | return { | |
193 | 'id': video_id, | |
194 | 'title': item.find('title').text, | |
195 | 'url': 'rtmp://extondemand.livestream.com/ondemand', | |
83855f3a PH |
196 | 'play_path': 'trans/dv15/mogulus-{0}'.format(path), |
197 | 'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque', | |
c66d2baa JMF |
198 | 'ext': 'flv', |
199 | 'thumbnail': thumbnail_url, | |
200 | } | |
78338f71 JMF |
201 | |
202 | def _extract_folder(self, url, folder_id): | |
203 | webpage = self._download_webpage(url, folder_id) | |
22a6f150 PH |
204 | paths = orderedSet(re.findall( |
205 | r'''(?x)(?: | |
206 | <li\s+class="folder">\s*<a\s+href="| | |
207 | <a\s+href="(?=https?://livestre\.am/) | |
208 | )([^"]+)"''', webpage)) | |
78338f71 JMF |
209 | |
210 | return { | |
211 | '_type': 'playlist', | |
212 | 'id': folder_id, | |
213 | 'entries': [{ | |
214 | '_type': 'url', | |
22a6f150 PH |
215 | 'url': compat_urlparse.urljoin(url, p), |
216 | } for p in paths], | |
78338f71 JMF |
217 | } |
218 | ||
219 | def _real_extract(self, url): | |
220 | mobj = re.match(self._VALID_URL, url) | |
221 | id = mobj.group('id') | |
222 | user = mobj.group('user') | |
223 | url_type = mobj.group('type') | |
224 | if url_type == 'folder': | |
225 | return self._extract_folder(url, id) | |
226 | else: | |
227 | return self._extract_video(user, id) | |
228 | ||
229 | ||
230 | # The server doesn't support HEAD request, the generic extractor can't detect | |
231 | # the redirection | |
232 | class LivestreamShortenerIE(InfoExtractor): | |
233 | IE_NAME = 'livestream:shortener' | |
234 | IE_DESC = False # Do not list | |
235 | _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' | |
236 | ||
237 | def _real_extract(self, url): | |
238 | mobj = re.match(self._VALID_URL, url) | |
239 | id = mobj.group('id') | |
240 | webpage = self._download_webpage(url, id) | |
241 | ||
242 | return { | |
243 | '_type': 'url', | |
244 | 'url': self._og_search_url(webpage), | |
245 | } |