]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vevo.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / vevo.py
CommitLineData
70d1924f 1import re
55992530 2import json
70d1924f
JMF
3
4from .common import InfoExtractor
e0da32df 5from ..compat import (
5c9ced95 6 compat_str,
9bccdc70 7 compat_HTTPError,
e0da32df 8)
1cc79574 9from ..utils import (
70d1924f 10 ExtractorError,
7d3d06a1 11 int_or_none,
9165d6ba 12 parse_iso8601,
4dfbf869 13 parse_qs,
70d1924f
JMF
14)
15
88bd97e3 16
9618c448 17class VevoBaseIE(InfoExtractor):
9d0c08a0 18 def _extract_json(self, webpage, video_id):
9618c448
S
19 return self._parse_json(
20 self._search_regex(
21 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
22 webpage, 'initial store'),
9d0c08a0 23 video_id)
9618c448
S
24
25
26class VevoIE(VevoBaseIE):
2975fe1a 27 '''
0577177e 28 Accepts urls from vevo.com or in the format 'vevo:{id}'
3266f0c6 29 (currently used by MTVIE and MySpaceIE)
2975fe1a 30 '''
f25571ff 31 _VALID_URL = r'''(?x)
92519402 32 (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
f25571ff 33 https?://cache\.vevo\.com/m/html/embed\.html\?video=|
ebce53b3 34 https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
d1e41164 35 https?://embed\.vevo\.com/.*?[?&]isrc=|
9c0412cf 36 https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
f25571ff
PH
37 vevo:)
38 (?P<id>[^&?#]+)'''
bfd973ec 39 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
fd5e6f7e 40
9c0412cf
L
41 _TESTS = [{
42 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
43 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
44 'info_dict': {
45 'id': 'GB1101300280',
46 'ext': 'mp4',
47 'title': 'Hurts - Somebody to Die For',
48 'timestamp': 1372057200,
49 'upload_date': '20130624',
50 'uploader': 'Hurts',
51 'track': 'Somebody to Die For',
52 'artist': 'Hurts',
53 'genre': 'Pop',
54 },
55 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
56 }, {
57 'note': 'v3 SMIL format',
58 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
59 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
60 'info_dict': {
61 'id': 'USUV71302923',
62 'ext': 'mp4',
63 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
64 'timestamp': 1392796919,
65 'upload_date': '20140219',
66 'uploader': 'Cassadee Pope',
67 'track': 'I Wish I Could Break Your Heart',
68 'artist': 'Cassadee Pope',
69 'genre': 'Country',
70 },
71 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
72 }, {
73 'note': 'Age-limited video',
74 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
75 'info_dict': {
76 'id': 'USRV81300282',
77 'ext': 'mp4',
78 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
79 'age_limit': 18,
80 'timestamp': 1372888800,
81 'upload_date': '20130703',
82 'uploader': 'Justin Timberlake',
83 'track': 'Tunnel Vision (Explicit)',
84 'artist': 'Justin Timberlake',
85 'genre': 'Pop',
86 },
87 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
88 }, {
89 'note': 'No video_info',
90 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
91 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
92 'info_dict': {
93 'id': 'USUV71503000',
94 'ext': 'mp4',
95 'title': 'K Camp ft. T.I. - Till I Die',
96 'age_limit': 18,
97 'timestamp': 1449468000,
98 'upload_date': '20151207',
99 'uploader': 'K Camp',
100 'track': 'Till I Die',
101 'artist': 'K Camp',
102 'genre': 'Hip-Hop',
103 },
104 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
105 }, {
106 'note': 'Featured test',
107 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
108 'md5': 'd28675e5e8805035d949dc5cf161071d',
109 'info_dict': {
110 'id': 'USUV71402190',
111 'ext': 'mp4',
112 'title': 'Lemaitre ft. LoLo - Wait',
113 'age_limit': 0,
114 'timestamp': 1413432000,
115 'upload_date': '20141016',
116 'uploader': 'Lemaitre',
117 'track': 'Wait',
118 'artist': 'Lemaitre',
119 'genre': 'Electronic',
120 },
121 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
122 }, {
123 'note': 'Only available via webpage',
124 'url': 'http://www.vevo.com/watch/GBUV71600656',
125 'md5': '67e79210613865b66a47c33baa5e37fe',
126 'info_dict': {
127 'id': 'GBUV71600656',
128 'ext': 'mp4',
129 'title': 'ABC - Viva Love',
130 'age_limit': 0,
131 'timestamp': 1461830400,
132 'upload_date': '20160428',
133 'uploader': 'ABC',
134 'track': 'Viva Love',
135 'artist': 'ABC',
136 'genre': 'Pop',
137 },
138 'expected_warnings': ['Failed to download video versions info'],
139 }, {
140 # no genres available
141 'url': 'http://www.vevo.com/watch/INS171400764',
142 'only_matching': True,
143 }, {
144 # Another case available only via the webpage; using streams/streamsV3 formats
145 # Geo-restricted to Netherlands/Germany
146 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
147 'only_matching': True,
148 }, {
149 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
150 'only_matching': True,
151 }, {
152 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
153 'only_matching': True,
154 }]
2975fe1a 155 _VERSIONS = {
9165d6ba 156 0: 'youtube', # only in AuthenticateVideo videoVersions
2975fe1a 157 1: 'level3',
158 2: 'akamai',
159 3: 'level3',
160 4: 'amazon',
161 }
162
682f8c43 163 def _initialize_api(self, video_id):
9165d6ba 164 webpage = self._download_webpage(
b07ea5ea 165 'https://accounts.vevo.com/token', None,
9165d6ba 166 note='Retrieving oauth token',
b07ea5ea
S
167 errnote='Unable to retrieve oauth token',
168 data=json.dumps({
169 'client_id': 'SPupX1tvqFEopQ1YS6SS',
170 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
171 }).encode('utf-8'),
172 headers={
173 'Content-Type': 'application/json',
174 })
9165d6ba 175
621a2800 176 if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
bc7e77a0
S
177 self.raise_geo_restricted(
178 '%s said: This page is currently unavailable in your region' % self.IE_NAME)
9165d6ba 179
180 auth_info = self._parse_json(webpage, video_id)
55992530 181 self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
9165d6ba 182
516ea41a 183 def _call_api(self, path, *args, **kwargs):
9bccdc70
RA
184 try:
185 data = self._download_json(self._api_url_template % path, *args, **kwargs)
186 except ExtractorError as e:
187 if isinstance(e.cause, compat_HTTPError):
188 errors = self._parse_json(e.cause.read().decode(), None)['errors']
189 error_message = ', '.join([error['message'] for error in errors])
190 raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
191 raise
192 return data
9165d6ba 193
72321ead 194 def _real_extract(self, url):
4b942883 195 video_id = self._match_id(url)
72321ead 196
9bccdc70 197 self._initialize_api(video_id)
682f8c43 198
9bccdc70
RA
199 video_info = self._call_api(
200 'video/%s' % video_id, video_id, 'Downloading api video info',
201 'Failed to download video info')
2975fe1a 202
9bccdc70
RA
203 video_versions = self._call_api(
204 'video/%s/streams' % video_id, video_id,
205 'Downloading video versions info',
206 'Failed to download video versions info',
207 fatal=False)
ff51983e 208
9bccdc70 209 # Some videos are only available via webpage (e.g.
067aa17e 210 # https://github.com/ytdl-org/youtube-dl/issues/9366)
9bccdc70
RA
211 if not video_versions:
212 webpage = self._download_webpage(url, video_id)
9d0c08a0
YCH
213 json_data = self._extract_json(webpage, video_id)
214 if 'streams' in json_data.get('default', {}):
215 video_versions = json_data['default']['streams'][video_id][0]
216 else:
217 video_versions = [
218 value
219 for key, value in json_data['apollo']['data'].items()
220 if key.startswith('%s.streams' % video_id)]
9618c448 221
9bccdc70
RA
222 uploader = None
223 artist = None
224 featured_artist = None
225 artists = video_info.get('artists')
226 for curr_artist in artists:
227 if curr_artist.get('role') == 'Featured':
228 featured_artist = curr_artist['name']
229 else:
230 artist = uploader = curr_artist['name']
9165d6ba 231
9bccdc70
RA
232 formats = []
233 for video_version in video_versions:
9d0c08a0 234 version = self._VERSIONS.get(video_version.get('version'), 'generic')
9bccdc70
RA
235 version_url = video_version.get('url')
236 if not version_url:
237 continue
9165d6ba 238
9bccdc70
RA
239 if '.ism' in version_url:
240 continue
241 elif '.mpd' in version_url:
242 formats.extend(self._extract_mpd_formats(
243 version_url, video_id, mpd_id='dash-%s' % version,
244 note='Downloading %s MPD information' % version,
245 errnote='Failed to download %s MPD information' % version,
246 fatal=False))
247 elif '.m3u8' in version_url:
248 formats.extend(self._extract_m3u8_formats(
249 version_url, video_id, 'mp4', 'm3u8_native',
250 m3u8_id='hls-%s' % version,
251 note='Downloading %s m3u8 information' % version,
252 errnote='Failed to download %s m3u8 information' % version,
253 fatal=False))
254 else:
255 m = re.search(r'''(?xi)
9c0412cf 256 _(?P<quality>[a-z0-9]+)
9bccdc70
RA
257 _(?P<width>[0-9]+)x(?P<height>[0-9]+)
258 _(?P<vcodec>[a-z0-9]+)
259 _(?P<vbr>[0-9]+)
260 _(?P<acodec>[a-z0-9]+)
261 _(?P<abr>[0-9]+)
262 \.(?P<ext>[a-z0-9]+)''', version_url)
263 if not m:
9165d6ba 264 continue
9165d6ba 265
9bccdc70
RA
266 formats.append({
267 'url': version_url,
9c0412cf 268 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
9bccdc70
RA
269 'vcodec': m.group('vcodec'),
270 'acodec': m.group('acodec'),
271 'vbr': int(m.group('vbr')),
272 'abr': int(m.group('abr')),
273 'ext': m.group('ext'),
274 'width': int(m.group('width')),
275 'height': int(m.group('height')),
276 })
2975fe1a 277 self._sort_formats(formats)
27579b9e 278
881dbc86 279 track = video_info['title']
9508738f
S
280 if featured_artist:
281 artist = '%s ft. %s' % (artist, featured_artist)
282 title = '%s - %s' % (artist, track) if artist else track
5c9ced95
S
283
284 genres = video_info.get('genres')
285 genre = (
3089bc74
S
286 genres[0] if genres and isinstance(genres, list)
287 and isinstance(genres[0], compat_str) else None)
9165d6ba 288
6cadf8c8
PH
289 is_explicit = video_info.get('isExplicit')
290 if is_explicit is True:
291 age_limit = 18
292 elif is_explicit is False:
293 age_limit = 0
294 else:
295 age_limit = None
296
45d7bc2f 297 return {
88bd97e3 298 'id': video_id,
ff51983e 299 'title': title,
88bd97e3 300 'formats': formats,
9165d6ba 301 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
9bccdc70 302 'timestamp': parse_iso8601(video_info.get('releaseDate')),
9165d6ba 303 'uploader': uploader,
9bccdc70
RA
304 'duration': int_or_none(video_info.get('duration')),
305 'view_count': int_or_none(video_info.get('views', {}).get('total')),
6cadf8c8 306 'age_limit': age_limit,
881dbc86
S
307 'track': track,
308 'artist': uploader,
309 'genre': genre,
88bd97e3 310 }
e0da32df
S
311
312
9618c448 313class VevoPlaylistIE(VevoBaseIE):
92519402 314 _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
e0da32df
S
315
316 _TESTS = [{
e2bd301c
S
317 'url': 'http://www.vevo.com/watch/genre/rock',
318 'info_dict': {
319 'id': 'rock',
320 'title': 'Rock',
321 },
322 'playlist_count': 20,
e0da32df
S
323 }, {
324 'url': 'http://www.vevo.com/watch/genre/rock?index=0',
325 'only_matching': True,
326 }]
327
328 def _real_extract(self, url):
5ad28e7f 329 mobj = self._match_valid_url(url)
e2bd301c
S
330 playlist_id = mobj.group('id')
331 playlist_kind = mobj.group('kind')
e0da32df
S
332
333 webpage = self._download_webpage(url, playlist_id)
334
4dfbf869 335 qs = parse_qs(url)
e0da32df
S
336 index = qs.get('index', [None])[0]
337
338 if index:
339 video_id = self._search_regex(
340 r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
341 webpage, 'video id', default=None, group='id')
342 if video_id:
343 return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
344
9d0c08a0 345 playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
e0da32df 346
e2bd301c
S
347 playlist = (list(playlists.values())[0]
348 if playlist_kind == 'playlist' else playlists[playlist_id])
e0da32df
S
349
350 entries = [
351 self.url_result('vevo:%s' % src, VevoIE.ie_key())
352 for src in playlist['isrcs']]
353
354 return self.playlist_result(
78a3ff33 355 entries, playlist.get('playlistId') or playlist_id,
e0da32df 356 playlist.get('name'), playlist.get('description'))