]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..compat import ( | |
7 | compat_etree_fromstring, | |
8 | compat_str, | |
9 | compat_urlparse, | |
10 | ) | |
11 | from ..utils import ( | |
12 | ExtractorError, | |
13 | int_or_none, | |
14 | sanitized_Request, | |
15 | parse_iso8601, | |
16 | ) | |
17 | ||
18 | ||
19 | class VevoBaseIE(InfoExtractor): | |
20 | def _extract_json(self, webpage, video_id, item): | |
21 | return self._parse_json( | |
22 | self._search_regex( | |
23 | r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', | |
24 | webpage, 'initial store'), | |
25 | video_id)['default'][item] | |
26 | ||
27 | ||
28 | class VevoIE(VevoBaseIE): | |
29 | ''' | |
30 | Accepts urls from vevo.com or in the format 'vevo:{id}' | |
31 | (currently used by MTVIE and MySpaceIE) | |
32 | ''' | |
33 | _VALID_URL = r'''(?x) | |
34 | (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| | |
35 | https?://cache\.vevo\.com/m/html/embed\.html\?video=| | |
36 | https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| | |
37 | vevo:) | |
38 | (?P<id>[^&?#]+)''' | |
39 | ||
40 | _TESTS = [{ | |
41 | 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', | |
42 | 'md5': '95ee28ee45e70130e3ab02b0f579ae23', | |
43 | 'info_dict': { | |
44 | 'id': 'GB1101300280', | |
45 | 'ext': 'mp4', | |
46 | 'title': 'Hurts - Somebody to Die For', | |
47 | 'timestamp': 1372057200, | |
48 | 'upload_date': '20130624', | |
49 | 'uploader': 'Hurts', | |
50 | 'track': 'Somebody to Die For', | |
51 | 'artist': 'Hurts', | |
52 | 'genre': 'Pop', | |
53 | }, | |
54 | 'expected_warnings': ['Unable to download SMIL file'], | |
55 | }, { | |
56 | 'note': 'v3 SMIL format', | |
57 | 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', | |
58 | 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', | |
59 | 'info_dict': { | |
60 | 'id': 'USUV71302923', | |
61 | 'ext': 'mp4', | |
62 | 'title': 'Cassadee Pope - I Wish I Could Break Your Heart', | |
63 | 'timestamp': 1392796919, | |
64 | 'upload_date': '20140219', | |
65 | 'uploader': 'Cassadee Pope', | |
66 | 'track': 'I Wish I Could Break Your Heart', | |
67 | 'artist': 'Cassadee Pope', | |
68 | 'genre': 'Country', | |
69 | }, | |
70 | 'expected_warnings': ['Unable to download SMIL file'], | |
71 | }, { | |
72 | 'note': 'Age-limited video', | |
73 | 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', | |
74 | 'info_dict': { | |
75 | 'id': 'USRV81300282', | |
76 | 'ext': 'mp4', | |
77 | 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', | |
78 | 'age_limit': 18, | |
79 | 'timestamp': 1372888800, | |
80 | 'upload_date': '20130703', | |
81 | 'uploader': 'Justin Timberlake', | |
82 | 'track': 'Tunnel Vision (Explicit)', | |
83 | 'artist': 'Justin Timberlake', | |
84 | 'genre': 'Pop', | |
85 | }, | |
86 | 'expected_warnings': ['Unable to download SMIL file'], | |
87 | }, { | |
88 | 'note': 'No video_info', | |
89 | 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', | |
90 | 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', | |
91 | 'info_dict': { | |
92 | 'id': 'USUV71503000', | |
93 | 'ext': 'mp4', | |
94 | 'title': 'K Camp - Till I Die', | |
95 | 'age_limit': 18, | |
96 | 'timestamp': 1449468000, | |
97 | 'upload_date': '20151207', | |
98 | 'uploader': 'K Camp', | |
99 | 'track': 'Till I Die', | |
100 | 'artist': 'K Camp', | |
101 | 'genre': 'Rap/Hip-Hop', | |
102 | }, | |
103 | }, { | |
104 | 'note': 'Only available via webpage', | |
105 | 'url': 'http://www.vevo.com/watch/GBUV71600656', | |
106 | 'md5': '67e79210613865b66a47c33baa5e37fe', | |
107 | 'info_dict': { | |
108 | 'id': 'GBUV71600656', | |
109 | 'ext': 'mp4', | |
110 | 'title': 'ABC - Viva Love', | |
111 | 'age_limit': 0, | |
112 | 'timestamp': 1461830400, | |
113 | 'upload_date': '20160428', | |
114 | 'uploader': 'ABC', | |
115 | 'track': 'Viva Love', | |
116 | 'artist': 'ABC', | |
117 | 'genre': 'Pop', | |
118 | }, | |
119 | 'expected_warnings': ['Failed to download video versions info'], | |
120 | }, { | |
121 | # no genres available | |
122 | 'url': 'http://www.vevo.com/watch/INS171400764', | |
123 | 'only_matching': True, | |
124 | }] | |
125 | _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' | |
126 | _SOURCE_TYPES = { | |
127 | 0: 'youtube', | |
128 | 1: 'brightcove', | |
129 | 2: 'http', | |
130 | 3: 'hls_ios', | |
131 | 4: 'hls', | |
132 | 5: 'smil', # http | |
133 | 7: 'f4m_cc', | |
134 | 8: 'f4m_ak', | |
135 | 9: 'f4m_l3', | |
136 | 10: 'ism', | |
137 | 13: 'smil', # rtmp | |
138 | 18: 'dash', | |
139 | } | |
140 | _VERSIONS = { | |
141 | 0: 'youtube', # only in AuthenticateVideo videoVersions | |
142 | 1: 'level3', | |
143 | 2: 'akamai', | |
144 | 3: 'level3', | |
145 | 4: 'amazon', | |
146 | } | |
147 | ||
148 | def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): | |
149 | formats = [] | |
150 | els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') | |
151 | for el in els: | |
152 | src = el.attrib['src'] | |
153 | m = re.match(r'''(?xi) | |
154 | (?P<ext>[a-z0-9]+): | |
155 | (?P<path> | |
156 | [/a-z0-9]+ # The directory and main part of the URL | |
157 | _(?P<tbr>[0-9]+)k | |
158 | _(?P<width>[0-9]+)x(?P<height>[0-9]+) | |
159 | _(?P<vcodec>[a-z0-9]+) | |
160 | _(?P<vbr>[0-9]+) | |
161 | _(?P<acodec>[a-z0-9]+) | |
162 | _(?P<abr>[0-9]+) | |
163 | \.[a-z0-9]+ # File extension | |
164 | )''', src) | |
165 | if not m: | |
166 | continue | |
167 | ||
168 | format_url = self._SMIL_BASE_URL + m.group('path') | |
169 | formats.append({ | |
170 | 'url': format_url, | |
171 | 'format_id': 'smil_' + m.group('tbr'), | |
172 | 'vcodec': m.group('vcodec'), | |
173 | 'acodec': m.group('acodec'), | |
174 | 'tbr': int(m.group('tbr')), | |
175 | 'vbr': int(m.group('vbr')), | |
176 | 'abr': int(m.group('abr')), | |
177 | 'ext': m.group('ext'), | |
178 | 'width': int(m.group('width')), | |
179 | 'height': int(m.group('height')), | |
180 | }) | |
181 | return formats | |
182 | ||
183 | def _initialize_api(self, video_id): | |
184 | req = sanitized_Request( | |
185 | 'http://www.vevo.com/auth', data=b'') | |
186 | webpage = self._download_webpage( | |
187 | req, None, | |
188 | note='Retrieving oauth token', | |
189 | errnote='Unable to retrieve oauth token') | |
190 | ||
191 | if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: | |
192 | self.raise_geo_restricted( | |
193 | '%s said: This page is currently unavailable in your region' % self.IE_NAME) | |
194 | ||
195 | auth_info = self._parse_json(webpage, video_id) | |
196 | self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] | |
197 | ||
198 | def _call_api(self, path, *args, **kwargs): | |
199 | return self._download_json(self._api_url_template % path, *args, **kwargs) | |
200 | ||
201 | def _real_extract(self, url): | |
202 | video_id = self._match_id(url) | |
203 | ||
204 | json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id | |
205 | response = self._download_json( | |
206 | json_url, video_id, 'Downloading video info', | |
207 | 'Unable to download info', fatal=False) or {} | |
208 | video_info = response.get('video') or {} | |
209 | artist = None | |
210 | featured_artist = None | |
211 | uploader = None | |
212 | view_count = None | |
213 | formats = [] | |
214 | ||
215 | if not video_info: | |
216 | try: | |
217 | self._initialize_api(video_id) | |
218 | except ExtractorError: | |
219 | ytid = response.get('errorInfo', {}).get('ytid') | |
220 | if ytid: | |
221 | self.report_warning( | |
222 | 'Video is geoblocked, trying with the YouTube video %s' % ytid) | |
223 | return self.url_result(ytid, 'Youtube', ytid) | |
224 | ||
225 | raise | |
226 | ||
227 | video_info = self._call_api( | |
228 | 'video/%s' % video_id, video_id, 'Downloading api video info', | |
229 | 'Failed to download video info') | |
230 | ||
231 | video_versions = self._call_api( | |
232 | 'video/%s/streams' % video_id, video_id, | |
233 | 'Downloading video versions info', | |
234 | 'Failed to download video versions info', | |
235 | fatal=False) | |
236 | ||
237 | # Some videos are only available via webpage (e.g. | |
238 | # https://github.com/rg3/youtube-dl/issues/9366) | |
239 | if not video_versions: | |
240 | webpage = self._download_webpage(url, video_id) | |
241 | video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] | |
242 | ||
243 | timestamp = parse_iso8601(video_info.get('releaseDate')) | |
244 | artists = video_info.get('artists') | |
245 | if artists: | |
246 | artist = uploader = artists[0]['name'] | |
247 | view_count = int_or_none(video_info.get('views', {}).get('total')) | |
248 | ||
249 | for video_version in video_versions: | |
250 | version = self._VERSIONS.get(video_version['version']) | |
251 | version_url = video_version.get('url') | |
252 | if not version_url: | |
253 | continue | |
254 | ||
255 | if '.ism' in version_url: | |
256 | continue | |
257 | elif '.mpd' in version_url: | |
258 | formats.extend(self._extract_mpd_formats( | |
259 | version_url, video_id, mpd_id='dash-%s' % version, | |
260 | note='Downloading %s MPD information' % version, | |
261 | errnote='Failed to download %s MPD information' % version, | |
262 | fatal=False)) | |
263 | elif '.m3u8' in version_url: | |
264 | formats.extend(self._extract_m3u8_formats( | |
265 | version_url, video_id, 'mp4', 'm3u8_native', | |
266 | m3u8_id='hls-%s' % version, | |
267 | note='Downloading %s m3u8 information' % version, | |
268 | errnote='Failed to download %s m3u8 information' % version, | |
269 | fatal=False)) | |
270 | else: | |
271 | m = re.search(r'''(?xi) | |
272 | _(?P<width>[0-9]+)x(?P<height>[0-9]+) | |
273 | _(?P<vcodec>[a-z0-9]+) | |
274 | _(?P<vbr>[0-9]+) | |
275 | _(?P<acodec>[a-z0-9]+) | |
276 | _(?P<abr>[0-9]+) | |
277 | \.(?P<ext>[a-z0-9]+)''', version_url) | |
278 | if not m: | |
279 | continue | |
280 | ||
281 | formats.append({ | |
282 | 'url': version_url, | |
283 | 'format_id': 'http-%s-%s' % (version, video_version['quality']), | |
284 | 'vcodec': m.group('vcodec'), | |
285 | 'acodec': m.group('acodec'), | |
286 | 'vbr': int(m.group('vbr')), | |
287 | 'abr': int(m.group('abr')), | |
288 | 'ext': m.group('ext'), | |
289 | 'width': int(m.group('width')), | |
290 | 'height': int(m.group('height')), | |
291 | }) | |
292 | else: | |
293 | timestamp = int_or_none(self._search_regex( | |
294 | r'/Date\((\d+)\)/', | |
295 | video_info['releaseDate'], 'release date', fatal=False), | |
296 | scale=1000) | |
297 | artists = video_info.get('mainArtists') | |
298 | if artists: | |
299 | artist = uploader = artists[0]['artistName'] | |
300 | ||
301 | featured_artists = video_info.get('featuredArtists') | |
302 | if featured_artists: | |
303 | featured_artist = featured_artists[0]['artistName'] | |
304 | ||
305 | smil_parsed = False | |
306 | for video_version in video_info['videoVersions']: | |
307 | version = self._VERSIONS.get(video_version['version']) | |
308 | if version == 'youtube': | |
309 | continue | |
310 | else: | |
311 | source_type = self._SOURCE_TYPES.get(video_version['sourceType']) | |
312 | renditions = compat_etree_fromstring(video_version['data']) | |
313 | if source_type == 'http': | |
314 | for rend in renditions.findall('rendition'): | |
315 | attr = rend.attrib | |
316 | formats.append({ | |
317 | 'url': attr['url'], | |
318 | 'format_id': 'http-%s-%s' % (version, attr['name']), | |
319 | 'height': int_or_none(attr.get('frameheight')), | |
320 | 'width': int_or_none(attr.get('frameWidth')), | |
321 | 'tbr': int_or_none(attr.get('totalBitrate')), | |
322 | 'vbr': int_or_none(attr.get('videoBitrate')), | |
323 | 'abr': int_or_none(attr.get('audioBitrate')), | |
324 | 'vcodec': attr.get('videoCodec'), | |
325 | 'acodec': attr.get('audioCodec'), | |
326 | }) | |
327 | elif source_type == 'hls': | |
328 | formats.extend(self._extract_m3u8_formats( | |
329 | renditions.find('rendition').attrib['url'], video_id, | |
330 | 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, | |
331 | note='Downloading %s m3u8 information' % version, | |
332 | errnote='Failed to download %s m3u8 information' % version, | |
333 | fatal=False)) | |
334 | elif source_type == 'smil' and version == 'level3' and not smil_parsed: | |
335 | formats.extend(self._extract_smil_formats( | |
336 | renditions.find('rendition').attrib['url'], video_id, False)) | |
337 | smil_parsed = True | |
338 | self._sort_formats(formats) | |
339 | ||
340 | track = video_info['title'] | |
341 | if featured_artist: | |
342 | artist = '%s ft. %s' % (artist, featured_artist) | |
343 | title = '%s - %s' % (artist, track) if artist else track | |
344 | ||
345 | genres = video_info.get('genres') | |
346 | genre = ( | |
347 | genres[0] if genres and isinstance(genres, list) and | |
348 | isinstance(genres[0], compat_str) else None) | |
349 | ||
350 | is_explicit = video_info.get('isExplicit') | |
351 | if is_explicit is True: | |
352 | age_limit = 18 | |
353 | elif is_explicit is False: | |
354 | age_limit = 0 | |
355 | else: | |
356 | age_limit = None | |
357 | ||
358 | duration = video_info.get('duration') | |
359 | ||
360 | return { | |
361 | 'id': video_id, | |
362 | 'title': title, | |
363 | 'formats': formats, | |
364 | 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), | |
365 | 'timestamp': timestamp, | |
366 | 'uploader': uploader, | |
367 | 'duration': duration, | |
368 | 'view_count': view_count, | |
369 | 'age_limit': age_limit, | |
370 | 'track': track, | |
371 | 'artist': uploader, | |
372 | 'genre': genre, | |
373 | } | |
374 | ||
375 | ||
376 | class VevoPlaylistIE(VevoBaseIE): | |
377 | _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' | |
378 | ||
379 | _TESTS = [{ | |
380 | 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', | |
381 | 'info_dict': { | |
382 | 'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', | |
383 | 'title': 'Best-Of: Birdman', | |
384 | }, | |
385 | 'playlist_count': 10, | |
386 | }, { | |
387 | 'url': 'http://www.vevo.com/watch/genre/rock', | |
388 | 'info_dict': { | |
389 | 'id': 'rock', | |
390 | 'title': 'Rock', | |
391 | }, | |
392 | 'playlist_count': 20, | |
393 | }, { | |
394 | 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', | |
395 | 'md5': '32dcdfddddf9ec6917fc88ca26d36282', | |
396 | 'info_dict': { | |
397 | 'id': 'USCMV1100073', | |
398 | 'ext': 'mp4', | |
399 | 'title': 'Birdman - Y.U. MAD', | |
400 | 'timestamp': 1323417600, | |
401 | 'upload_date': '20111209', | |
402 | 'uploader': 'Birdman', | |
403 | 'track': 'Y.U. MAD', | |
404 | 'artist': 'Birdman', | |
405 | 'genre': 'Rap/Hip-Hop', | |
406 | }, | |
407 | 'expected_warnings': ['Unable to download SMIL file'], | |
408 | }, { | |
409 | 'url': 'http://www.vevo.com/watch/genre/rock?index=0', | |
410 | 'only_matching': True, | |
411 | }] | |
412 | ||
413 | def _real_extract(self, url): | |
414 | mobj = re.match(self._VALID_URL, url) | |
415 | playlist_id = mobj.group('id') | |
416 | playlist_kind = mobj.group('kind') | |
417 | ||
418 | webpage = self._download_webpage(url, playlist_id) | |
419 | ||
420 | qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | |
421 | index = qs.get('index', [None])[0] | |
422 | ||
423 | if index: | |
424 | video_id = self._search_regex( | |
425 | r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>', | |
426 | webpage, 'video id', default=None, group='id') | |
427 | if video_id: | |
428 | return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) | |
429 | ||
430 | playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) | |
431 | ||
432 | playlist = (list(playlists.values())[0] | |
433 | if playlist_kind == 'playlist' else playlists[playlist_id]) | |
434 | ||
435 | entries = [ | |
436 | self.url_result('vevo:%s' % src, VevoIE.ie_key()) | |
437 | for src in playlist['isrcs']] | |
438 | ||
439 | return self.playlist_result( | |
440 | entries, playlist.get('playlistId') or playlist_id, | |
441 | playlist.get('name'), playlist.get('description')) |