]>
Commit | Line | Data |
---|---|---|
45d7bc2f JMF |
1 | from __future__ import unicode_literals |
2 | ||
70d1924f | 3 | import re |
70d1924f JMF |
4 | |
5 | from .common import InfoExtractor | |
e0da32df S |
6 | from ..compat import ( |
7 | compat_etree_fromstring, | |
5c9ced95 | 8 | compat_str, |
e0da32df S |
9 | compat_urlparse, |
10 | ) | |
1cc79574 | 11 | from ..utils import ( |
70d1924f | 12 | ExtractorError, |
7d3d06a1 | 13 | int_or_none, |
9165d6ba | 14 | sanitized_Request, |
15 | parse_iso8601, | |
70d1924f JMF |
16 | ) |
17 | ||
88bd97e3 | 18 | |
9618c448 S |
19 | class VevoBaseIE(InfoExtractor): |
20 | def _extract_json(self, webpage, video_id, item): | |
21 | return self._parse_json( | |
22 | self._search_regex( | |
23 | r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', | |
24 | webpage, 'initial store'), | |
25 | video_id)['default'][item] | |
26 | ||
27 | ||
28 | class VevoIE(VevoBaseIE): | |
2975fe1a | 29 | ''' |
0577177e | 30 | Accepts urls from vevo.com or in the format 'vevo:{id}' |
3266f0c6 | 31 | (currently used by MTVIE and MySpaceIE) |
2975fe1a | 32 | ''' |
f25571ff | 33 | _VALID_URL = r'''(?x) |
e0da32df | 34 | (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| |
f25571ff | 35 | https?://cache\.vevo\.com/m/html/embed\.html\?video=| |
ebce53b3 | 36 | https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| |
f25571ff PH |
37 | vevo:) |
38 | (?P<id>[^&?#]+)''' | |
fd5e6f7e | 39 | |
72321ead | 40 | _TESTS = [{ |
45d7bc2f | 41 | 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', |
682f8c43 | 42 | 'md5': '95ee28ee45e70130e3ab02b0f579ae23', |
45d7bc2f | 43 | 'info_dict': { |
682f8c43 | 44 | 'id': 'GB1101300280', |
27579b9e | 45 | 'ext': 'mp4', |
881dbc86 S |
46 | 'title': 'Hurts - Somebody to Die For', |
47 | 'timestamp': 1372057200, | |
2975fe1a | 48 | 'upload_date': '20130624', |
682f8c43 | 49 | 'uploader': 'Hurts', |
881dbc86 S |
50 | 'track': 'Somebody to Die For', |
51 | 'artist': 'Hurts', | |
52 | 'genre': 'Pop', | |
2975fe1a | 53 | }, |
881dbc86 | 54 | 'expected_warnings': ['Unable to download SMIL file'], |
27579b9e PH |
55 | }, { |
56 | 'note': 'v3 SMIL format', | |
57 | 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', | |
682f8c43 | 58 | 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', |
27579b9e | 59 | 'info_dict': { |
682f8c43 | 60 | 'id': 'USUV71302923', |
27579b9e | 61 | 'ext': 'mp4', |
881dbc86 S |
62 | 'title': 'Cassadee Pope - I Wish I Could Break Your Heart', |
63 | 'timestamp': 1392796919, | |
27579b9e | 64 | 'upload_date': '20140219', |
682f8c43 | 65 | 'uploader': 'Cassadee Pope', |
881dbc86 S |
66 | 'track': 'I Wish I Could Break Your Heart', |
67 | 'artist': 'Cassadee Pope', | |
68 | 'genre': 'Country', | |
2975fe1a | 69 | }, |
881dbc86 | 70 | 'expected_warnings': ['Unable to download SMIL file'], |
6cadf8c8 PH |
71 | }, { |
72 | 'note': 'Age-limited video', | |
73 | 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', | |
74 | 'info_dict': { | |
682f8c43 | 75 | 'id': 'USRV81300282', |
6cadf8c8 | 76 | 'ext': 'mp4', |
881dbc86 | 77 | 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', |
682f8c43 | 78 | 'age_limit': 18, |
682f8c43 | 79 | 'timestamp': 1372888800, |
881dbc86 S |
80 | 'upload_date': '20130703', |
81 | 'uploader': 'Justin Timberlake', | |
82 | 'track': 'Tunnel Vision (Explicit)', | |
83 | 'artist': 'Justin Timberlake', | |
84 | 'genre': 'Pop', | |
2975fe1a | 85 | }, |
881dbc86 | 86 | 'expected_warnings': ['Unable to download SMIL file'], |
ff51983e YCH |
87 | }, { |
88 | 'note': 'No video_info', | |
89 | 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', | |
682f8c43 | 90 | 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', |
ff51983e | 91 | 'info_dict': { |
682f8c43 | 92 | 'id': 'USUV71503000', |
ff51983e | 93 | 'ext': 'mp4', |
881dbc86 | 94 | 'title': 'K Camp - Till I Die', |
682f8c43 | 95 | 'age_limit': 18, |
682f8c43 | 96 | 'timestamp': 1449468000, |
881dbc86 S |
97 | 'upload_date': '20151207', |
98 | 'uploader': 'K Camp', | |
99 | 'track': 'Till I Die', | |
100 | 'artist': 'K Camp', | |
101 | 'genre': 'Rap/Hip-Hop', | |
ff51983e | 102 | }, |
8e7d0048 S |
103 | }, { |
104 | 'note': 'Only available via webpage', | |
105 | 'url': 'http://www.vevo.com/watch/GBUV71600656', | |
106 | 'md5': '67e79210613865b66a47c33baa5e37fe', | |
107 | 'info_dict': { | |
108 | 'id': 'GBUV71600656', | |
109 | 'ext': 'mp4', | |
881dbc86 | 110 | 'title': 'ABC - Viva Love', |
8e7d0048 | 111 | 'age_limit': 0, |
8e7d0048 | 112 | 'timestamp': 1461830400, |
881dbc86 S |
113 | 'upload_date': '20160428', |
114 | 'uploader': 'ABC', | |
115 | 'track': 'Viva Love', | |
116 | 'artist': 'ABC', | |
117 | 'genre': 'Pop', | |
8e7d0048 S |
118 | }, |
119 | 'expected_warnings': ['Failed to download video versions info'], | |
5c9ced95 S |
120 | }, { |
121 | # no genres available | |
122 | 'url': 'http://www.vevo.com/watch/INS171400764', | |
123 | 'only_matching': True, | |
72321ead | 124 | }] |
2975fe1a | 125 | _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' |
126 | _SOURCE_TYPES = { | |
127 | 0: 'youtube', | |
128 | 1: 'brightcove', | |
129 | 2: 'http', | |
130 | 3: 'hls_ios', | |
131 | 4: 'hls', | |
132 | 5: 'smil', # http | |
133 | 7: 'f4m_cc', | |
134 | 8: 'f4m_ak', | |
135 | 9: 'f4m_l3', | |
136 | 10: 'ism', | |
137 | 13: 'smil', # rtmp | |
138 | 18: 'dash', | |
139 | } | |
140 | _VERSIONS = { | |
9165d6ba | 141 | 0: 'youtube', # only in AuthenticateVideo videoVersions |
2975fe1a | 142 | 1: 'level3', |
143 | 2: 'akamai', | |
144 | 3: 'level3', | |
145 | 4: 'amazon', | |
146 | } | |
147 | ||
148 | def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): | |
72321ead | 149 | formats = [] |
2975fe1a | 150 | els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') |
72321ead PH |
151 | for el in els: |
152 | src = el.attrib['src'] | |
153 | m = re.match(r'''(?xi) | |
154 | (?P<ext>[a-z0-9]+): | |
155 | (?P<path> | |
156 | [/a-z0-9]+ # The directory and main part of the URL | |
2975fe1a | 157 | _(?P<tbr>[0-9]+)k |
72321ead PH |
158 | _(?P<width>[0-9]+)x(?P<height>[0-9]+) |
159 | _(?P<vcodec>[a-z0-9]+) | |
160 | _(?P<vbr>[0-9]+) | |
161 | _(?P<acodec>[a-z0-9]+) | |
162 | _(?P<abr>[0-9]+) | |
163 | \.[a-z0-9]+ # File extension | |
164 | )''', src) | |
165 | if not m: | |
166 | continue | |
167 | ||
168 | format_url = self._SMIL_BASE_URL + m.group('path') | |
72321ead PH |
169 | formats.append({ |
170 | 'url': format_url, | |
2975fe1a | 171 | 'format_id': 'smil_' + m.group('tbr'), |
91c7271a PH |
172 | 'vcodec': m.group('vcodec'), |
173 | 'acodec': m.group('acodec'), | |
2975fe1a | 174 | 'tbr': int(m.group('tbr')), |
91c7271a PH |
175 | 'vbr': int(m.group('vbr')), |
176 | 'abr': int(m.group('abr')), | |
72321ead PH |
177 | 'ext': m.group('ext'), |
178 | 'width': int(m.group('width')), | |
179 | 'height': int(m.group('height')), | |
180 | }) | |
181 | return formats | |
182 | ||
682f8c43 | 183 | def _initialize_api(self, video_id): |
9165d6ba | 184 | req = sanitized_Request( |
185 | 'http://www.vevo.com/auth', data=b'') | |
186 | webpage = self._download_webpage( | |
187 | req, None, | |
188 | note='Retrieving oauth token', | |
189 | errnote='Unable to retrieve oauth token') | |
190 | ||
191 | if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: | |
bc7e77a0 S |
192 | self.raise_geo_restricted( |
193 | '%s said: This page is currently unavailable in your region' % self.IE_NAME) | |
9165d6ba | 194 | |
195 | auth_info = self._parse_json(webpage, video_id) | |
196 | self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] | |
197 | ||
516ea41a S |
198 | def _call_api(self, path, *args, **kwargs): |
199 | return self._download_json(self._api_url_template % path, *args, **kwargs) | |
9165d6ba | 200 | |
72321ead | 201 | def _real_extract(self, url): |
4b942883 | 202 | video_id = self._match_id(url) |
72321ead | 203 | |
f745403b | 204 | json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id |
682f8c43 | 205 | response = self._download_json( |
3e80e6f4 S |
206 | json_url, video_id, 'Downloading video info', |
207 | 'Unable to download info', fatal=False) or {} | |
9165d6ba | 208 | video_info = response.get('video') or {} |
9508738f S |
209 | artist = None |
210 | featured_artist = None | |
9165d6ba | 211 | uploader = None |
9165d6ba | 212 | view_count = None |
213 | formats = [] | |
e97e53ee | 214 | |
ff51983e | 215 | if not video_info: |
3e80e6f4 | 216 | if response and response.get('statusCode') != 909: |
682f8c43 | 217 | ytid = response.get('errorInfo', {}).get('ytid') |
218 | if ytid: | |
219 | self.report_warning( | |
220 | 'Video is geoblocked, trying with the YouTube video %s' % ytid) | |
221 | return self.url_result(ytid, 'Youtube', ytid) | |
222 | ||
2975fe1a | 223 | if 'statusMessage' in response: |
224 | raise ExtractorError('%s said: %s' % ( | |
225 | self.IE_NAME, response['statusMessage']), expected=True) | |
226 | raise ExtractorError('Unable to extract videos') | |
227 | ||
682f8c43 | 228 | self._initialize_api(video_id) |
9165d6ba | 229 | video_info = self._call_api( |
230 | 'video/%s' % video_id, video_id, 'Downloading api video info', | |
231 | 'Failed to download video info') | |
ff51983e | 232 | |
9165d6ba | 233 | video_versions = self._call_api( |
234 | 'video/%s/streams' % video_id, video_id, | |
235 | 'Downloading video versions info', | |
9618c448 S |
236 | 'Failed to download video versions info', |
237 | fatal=False) | |
238 | ||
239 | # Some videos are only available via webpage (e.g. | |
240 | # https://github.com/rg3/youtube-dl/issues/9366) | |
241 | if not video_versions: | |
242 | webpage = self._download_webpage(url, video_id) | |
243 | video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] | |
9165d6ba | 244 | |
245 | timestamp = parse_iso8601(video_info.get('releaseDate')) | |
246 | artists = video_info.get('artists') | |
247 | if artists: | |
9508738f | 248 | artist = uploader = artists[0]['name'] |
9165d6ba | 249 | view_count = int_or_none(video_info.get('views', {}).get('total')) |
250 | ||
251 | for video_version in video_versions: | |
252 | version = self._VERSIONS.get(video_version['version']) | |
253 | version_url = video_version.get('url') | |
254 | if not version_url: | |
682f8c43 | 255 | continue |
9165d6ba | 256 | |
5976e7ab | 257 | if '.ism' in version_url: |
9165d6ba | 258 | continue |
5976e7ab | 259 | elif '.mpd' in version_url: |
260 | formats.extend(self._extract_mpd_formats( | |
261 | version_url, video_id, mpd_id='dash-%s' % version, | |
262 | note='Downloading %s MPD information' % version, | |
263 | errnote='Failed to download %s MPD information' % version, | |
264 | fatal=False)) | |
9165d6ba | 265 | elif '.m3u8' in version_url: |
2975fe1a | 266 | formats.extend(self._extract_m3u8_formats( |
9165d6ba | 267 | version_url, video_id, 'mp4', 'm3u8_native', |
268 | m3u8_id='hls-%s' % version, | |
269 | note='Downloading %s m3u8 information' % version, | |
270 | errnote='Failed to download %s m3u8 information' % version, | |
271 | fatal=False)) | |
272 | else: | |
273 | m = re.search(r'''(?xi) | |
274 | _(?P<width>[0-9]+)x(?P<height>[0-9]+) | |
275 | _(?P<vcodec>[a-z0-9]+) | |
276 | _(?P<vbr>[0-9]+) | |
277 | _(?P<acodec>[a-z0-9]+) | |
278 | _(?P<abr>[0-9]+) | |
279 | \.(?P<ext>[a-z0-9]+)''', version_url) | |
280 | if not m: | |
281 | continue | |
282 | ||
283 | formats.append({ | |
284 | 'url': version_url, | |
285 | 'format_id': 'http-%s-%s' % (version, video_version['quality']), | |
286 | 'vcodec': m.group('vcodec'), | |
287 | 'acodec': m.group('acodec'), | |
288 | 'vbr': int(m.group('vbr')), | |
289 | 'abr': int(m.group('abr')), | |
290 | 'ext': m.group('ext'), | |
291 | 'width': int(m.group('width')), | |
292 | 'height': int(m.group('height')), | |
293 | }) | |
294 | else: | |
295 | timestamp = int_or_none(self._search_regex( | |
296 | r'/Date\((\d+)\)/', | |
297 | video_info['releaseDate'], 'release date', fatal=False), | |
298 | scale=1000) | |
299 | artists = video_info.get('mainArtists') | |
300 | if artists: | |
9508738f S |
301 | artist = uploader = artists[0]['artistName'] |
302 | ||
303 | featured_artists = video_info.get('featuredArtists') | |
304 | if featured_artists: | |
305 | featured_artist = featured_artists[0]['artistName'] | |
9165d6ba | 306 | |
307 | smil_parsed = False | |
308 | for video_version in video_info['videoVersions']: | |
309 | version = self._VERSIONS.get(video_version['version']) | |
310 | if version == 'youtube': | |
682f8c43 | 311 | continue |
9165d6ba | 312 | else: |
313 | source_type = self._SOURCE_TYPES.get(video_version['sourceType']) | |
314 | renditions = compat_etree_fromstring(video_version['data']) | |
315 | if source_type == 'http': | |
316 | for rend in renditions.findall('rendition'): | |
317 | attr = rend.attrib | |
318 | formats.append({ | |
319 | 'url': attr['url'], | |
320 | 'format_id': 'http-%s-%s' % (version, attr['name']), | |
321 | 'height': int_or_none(attr.get('frameheight')), | |
322 | 'width': int_or_none(attr.get('frameWidth')), | |
323 | 'tbr': int_or_none(attr.get('totalBitrate')), | |
324 | 'vbr': int_or_none(attr.get('videoBitrate')), | |
325 | 'abr': int_or_none(attr.get('audioBitrate')), | |
326 | 'vcodec': attr.get('videoCodec'), | |
327 | 'acodec': attr.get('audioCodec'), | |
328 | }) | |
329 | elif source_type == 'hls': | |
330 | formats.extend(self._extract_m3u8_formats( | |
331 | renditions.find('rendition').attrib['url'], video_id, | |
332 | 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, | |
333 | note='Downloading %s m3u8 information' % version, | |
334 | errnote='Failed to download %s m3u8 information' % version, | |
335 | fatal=False)) | |
682f8c43 | 336 | elif source_type == 'smil' and version == 'level3' and not smil_parsed: |
9165d6ba | 337 | formats.extend(self._extract_smil_formats( |
338 | renditions.find('rendition').attrib['url'], video_id, False)) | |
339 | smil_parsed = True | |
2975fe1a | 340 | self._sort_formats(formats) |
27579b9e | 341 | |
881dbc86 | 342 | track = video_info['title'] |
9508738f S |
343 | if featured_artist: |
344 | artist = '%s ft. %s' % (artist, featured_artist) | |
345 | title = '%s - %s' % (artist, track) if artist else track | |
5c9ced95 S |
346 | |
347 | genres = video_info.get('genres') | |
348 | genre = ( | |
349 | genres[0] if genres and isinstance(genres, list) and | |
350 | isinstance(genres[0], compat_str) else None) | |
9165d6ba | 351 | |
6cadf8c8 PH |
352 | is_explicit = video_info.get('isExplicit') |
353 | if is_explicit is True: | |
354 | age_limit = 18 | |
355 | elif is_explicit is False: | |
356 | age_limit = 0 | |
357 | else: | |
358 | age_limit = None | |
359 | ||
9165d6ba | 360 | duration = video_info.get('duration') |
9d2ecdbc | 361 | |
45d7bc2f | 362 | return { |
88bd97e3 | 363 | 'id': video_id, |
ff51983e | 364 | 'title': title, |
88bd97e3 | 365 | 'formats': formats, |
9165d6ba | 366 | 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), |
ff51983e | 367 | 'timestamp': timestamp, |
9165d6ba | 368 | 'uploader': uploader, |
ff51983e | 369 | 'duration': duration, |
9165d6ba | 370 | 'view_count': view_count, |
6cadf8c8 | 371 | 'age_limit': age_limit, |
881dbc86 S |
372 | 'track': track, |
373 | 'artist': uploader, | |
374 | 'genre': genre, | |
88bd97e3 | 375 | } |
e0da32df S |
376 | |
377 | ||
9618c448 | 378 | class VevoPlaylistIE(VevoBaseIE): |
e2bd301c | 379 | _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' |
e0da32df S |
380 | |
381 | _TESTS = [{ | |
382 | 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', | |
383 | 'info_dict': { | |
384 | 'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', | |
385 | 'title': 'Best-Of: Birdman', | |
386 | }, | |
387 | 'playlist_count': 10, | |
e2bd301c S |
388 | }, { |
389 | 'url': 'http://www.vevo.com/watch/genre/rock', | |
390 | 'info_dict': { | |
391 | 'id': 'rock', | |
392 | 'title': 'Rock', | |
393 | }, | |
394 | 'playlist_count': 20, | |
e0da32df S |
395 | }, { |
396 | 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', | |
397 | 'md5': '32dcdfddddf9ec6917fc88ca26d36282', | |
398 | 'info_dict': { | |
399 | 'id': 'USCMV1100073', | |
400 | 'ext': 'mp4', | |
881dbc86 | 401 | 'title': 'Birdman - Y.U. MAD', |
e0da32df S |
402 | 'timestamp': 1323417600, |
403 | 'upload_date': '20111209', | |
404 | 'uploader': 'Birdman', | |
881dbc86 S |
405 | 'track': 'Y.U. MAD', |
406 | 'artist': 'Birdman', | |
407 | 'genre': 'Rap/Hip-Hop', | |
e0da32df S |
408 | }, |
409 | 'expected_warnings': ['Unable to download SMIL file'], | |
e0da32df S |
410 | }, { |
411 | 'url': 'http://www.vevo.com/watch/genre/rock?index=0', | |
412 | 'only_matching': True, | |
413 | }] | |
414 | ||
415 | def _real_extract(self, url): | |
e2bd301c S |
416 | mobj = re.match(self._VALID_URL, url) |
417 | playlist_id = mobj.group('id') | |
418 | playlist_kind = mobj.group('kind') | |
e0da32df S |
419 | |
420 | webpage = self._download_webpage(url, playlist_id) | |
421 | ||
422 | qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | |
423 | index = qs.get('index', [None])[0] | |
424 | ||
425 | if index: | |
426 | video_id = self._search_regex( | |
427 | r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>', | |
428 | webpage, 'video id', default=None, group='id') | |
429 | if video_id: | |
430 | return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) | |
431 | ||
9618c448 | 432 | playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) |
e0da32df | 433 | |
e2bd301c S |
434 | playlist = (list(playlists.values())[0] |
435 | if playlist_kind == 'playlist' else playlists[playlist_id]) | |
e0da32df S |
436 | |
437 | entries = [ | |
438 | self.url_result('vevo:%s' % src, VevoIE.ie_key()) | |
439 | for src in playlist['isrcs']] | |
440 | ||
441 | return self.playlist_result( | |
78a3ff33 | 442 | entries, playlist.get('playlistId') or playlist_id, |
e0da32df | 443 | playlist.get('name'), playlist.get('description')) |