]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vevo.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / vevo.py
CommitLineData
55992530 1import json
e897bd82 2import re
70d1924f
JMF
3
4from .common import InfoExtractor
3d2623a8 5from ..networking.exceptions import HTTPError
1cc79574 6from ..utils import (
70d1924f 7 ExtractorError,
7d3d06a1 8 int_or_none,
9165d6ba 9 parse_iso8601,
4dfbf869 10 parse_qs,
70d1924f
JMF
11)
12
88bd97e3 13
9618c448 14class VevoBaseIE(InfoExtractor):
9d0c08a0 15 def _extract_json(self, webpage, video_id):
9618c448
S
16 return self._parse_json(
17 self._search_regex(
18 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
19 webpage, 'initial store'),
9d0c08a0 20 video_id)
9618c448
S
21
22
23class VevoIE(VevoBaseIE):
add96eb9 24 """
0577177e 25 Accepts urls from vevo.com or in the format 'vevo:{id}'
3266f0c6 26 (currently used by MTVIE and MySpaceIE)
add96eb9 27 """
f25571ff 28 _VALID_URL = r'''(?x)
92519402 29 (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
f25571ff 30 https?://cache\.vevo\.com/m/html/embed\.html\?video=|
ebce53b3 31 https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
d1e41164 32 https?://embed\.vevo\.com/.*?[?&]isrc=|
9c0412cf 33 https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
f25571ff
PH
34 vevo:)
35 (?P<id>[^&?#]+)'''
bfd973ec 36 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
fd5e6f7e 37
9c0412cf
L
38 _TESTS = [{
39 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
40 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
41 'info_dict': {
42 'id': 'GB1101300280',
43 'ext': 'mp4',
44 'title': 'Hurts - Somebody to Die For',
45 'timestamp': 1372057200,
46 'upload_date': '20130624',
47 'uploader': 'Hurts',
48 'track': 'Somebody to Die For',
49 'artist': 'Hurts',
50 'genre': 'Pop',
51 },
52 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
53 }, {
54 'note': 'v3 SMIL format',
55 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
56 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
57 'info_dict': {
58 'id': 'USUV71302923',
59 'ext': 'mp4',
60 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
61 'timestamp': 1392796919,
62 'upload_date': '20140219',
63 'uploader': 'Cassadee Pope',
64 'track': 'I Wish I Could Break Your Heart',
65 'artist': 'Cassadee Pope',
66 'genre': 'Country',
67 },
68 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
69 }, {
70 'note': 'Age-limited video',
71 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
72 'info_dict': {
73 'id': 'USRV81300282',
74 'ext': 'mp4',
75 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
76 'age_limit': 18,
77 'timestamp': 1372888800,
78 'upload_date': '20130703',
79 'uploader': 'Justin Timberlake',
80 'track': 'Tunnel Vision (Explicit)',
81 'artist': 'Justin Timberlake',
82 'genre': 'Pop',
83 },
84 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
85 }, {
86 'note': 'No video_info',
87 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
88 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
89 'info_dict': {
90 'id': 'USUV71503000',
91 'ext': 'mp4',
92 'title': 'K Camp ft. T.I. - Till I Die',
93 'age_limit': 18,
94 'timestamp': 1449468000,
95 'upload_date': '20151207',
96 'uploader': 'K Camp',
97 'track': 'Till I Die',
98 'artist': 'K Camp',
99 'genre': 'Hip-Hop',
100 },
101 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
102 }, {
103 'note': 'Featured test',
104 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
105 'md5': 'd28675e5e8805035d949dc5cf161071d',
106 'info_dict': {
107 'id': 'USUV71402190',
108 'ext': 'mp4',
109 'title': 'Lemaitre ft. LoLo - Wait',
110 'age_limit': 0,
111 'timestamp': 1413432000,
112 'upload_date': '20141016',
113 'uploader': 'Lemaitre',
114 'track': 'Wait',
115 'artist': 'Lemaitre',
116 'genre': 'Electronic',
117 },
118 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
119 }, {
120 'note': 'Only available via webpage',
121 'url': 'http://www.vevo.com/watch/GBUV71600656',
122 'md5': '67e79210613865b66a47c33baa5e37fe',
123 'info_dict': {
124 'id': 'GBUV71600656',
125 'ext': 'mp4',
126 'title': 'ABC - Viva Love',
127 'age_limit': 0,
128 'timestamp': 1461830400,
129 'upload_date': '20160428',
130 'uploader': 'ABC',
131 'track': 'Viva Love',
132 'artist': 'ABC',
133 'genre': 'Pop',
134 },
135 'expected_warnings': ['Failed to download video versions info'],
136 }, {
137 # no genres available
138 'url': 'http://www.vevo.com/watch/INS171400764',
139 'only_matching': True,
140 }, {
141 # Another case available only via the webpage; using streams/streamsV3 formats
142 # Geo-restricted to Netherlands/Germany
143 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
144 'only_matching': True,
145 }, {
146 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
147 'only_matching': True,
148 }, {
149 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
150 'only_matching': True,
151 }]
2975fe1a 152 _VERSIONS = {
9165d6ba 153 0: 'youtube', # only in AuthenticateVideo videoVersions
2975fe1a 154 1: 'level3',
155 2: 'akamai',
156 3: 'level3',
157 4: 'amazon',
158 }
159
682f8c43 160 def _initialize_api(self, video_id):
9165d6ba 161 webpage = self._download_webpage(
b07ea5ea 162 'https://accounts.vevo.com/token', None,
9165d6ba 163 note='Retrieving oauth token',
b07ea5ea
S
164 errnote='Unable to retrieve oauth token',
165 data=json.dumps({
166 'client_id': 'SPupX1tvqFEopQ1YS6SS',
167 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
add96eb9 168 }).encode(),
b07ea5ea
S
169 headers={
170 'Content-Type': 'application/json',
171 })
9165d6ba 172
621a2800 173 if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
bc7e77a0 174 self.raise_geo_restricted(
add96eb9 175 f'{self.IE_NAME} said: This page is currently unavailable in your region')
9165d6ba 176
177 auth_info = self._parse_json(webpage, video_id)
55992530 178 self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
9165d6ba 179
516ea41a 180 def _call_api(self, path, *args, **kwargs):
9bccdc70
RA
181 try:
182 data = self._download_json(self._api_url_template % path, *args, **kwargs)
183 except ExtractorError as e:
3d2623a8 184 if isinstance(e.cause, HTTPError):
185 errors = self._parse_json(e.cause.response.read().decode(), None)['errors']
9bccdc70 186 error_message = ', '.join([error['message'] for error in errors])
add96eb9 187 raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
9bccdc70
RA
188 raise
189 return data
9165d6ba 190
72321ead 191 def _real_extract(self, url):
4b942883 192 video_id = self._match_id(url)
72321ead 193
9bccdc70 194 self._initialize_api(video_id)
682f8c43 195
9bccdc70 196 video_info = self._call_api(
add96eb9 197 f'video/{video_id}', video_id, 'Downloading api video info',
9bccdc70 198 'Failed to download video info')
2975fe1a 199
9bccdc70 200 video_versions = self._call_api(
add96eb9 201 f'video/{video_id}/streams', video_id,
9bccdc70
RA
202 'Downloading video versions info',
203 'Failed to download video versions info',
204 fatal=False)
ff51983e 205
9bccdc70 206 # Some videos are only available via webpage (e.g.
067aa17e 207 # https://github.com/ytdl-org/youtube-dl/issues/9366)
9bccdc70
RA
208 if not video_versions:
209 webpage = self._download_webpage(url, video_id)
9d0c08a0
YCH
210 json_data = self._extract_json(webpage, video_id)
211 if 'streams' in json_data.get('default', {}):
212 video_versions = json_data['default']['streams'][video_id][0]
213 else:
214 video_versions = [
215 value
216 for key, value in json_data['apollo']['data'].items()
add96eb9 217 if key.startswith(f'{video_id}.streams')]
9618c448 218
9bccdc70
RA
219 uploader = None
220 artist = None
221 featured_artist = None
222 artists = video_info.get('artists')
223 for curr_artist in artists:
224 if curr_artist.get('role') == 'Featured':
225 featured_artist = curr_artist['name']
226 else:
227 artist = uploader = curr_artist['name']
9165d6ba 228
9bccdc70
RA
229 formats = []
230 for video_version in video_versions:
9d0c08a0 231 version = self._VERSIONS.get(video_version.get('version'), 'generic')
9bccdc70
RA
232 version_url = video_version.get('url')
233 if not version_url:
234 continue
9165d6ba 235
9bccdc70
RA
236 if '.ism' in version_url:
237 continue
238 elif '.mpd' in version_url:
239 formats.extend(self._extract_mpd_formats(
add96eb9 240 version_url, video_id, mpd_id=f'dash-{version}',
241 note=f'Downloading {version} MPD information',
242 errnote=f'Failed to download {version} MPD information',
9bccdc70
RA
243 fatal=False))
244 elif '.m3u8' in version_url:
245 formats.extend(self._extract_m3u8_formats(
246 version_url, video_id, 'mp4', 'm3u8_native',
add96eb9 247 m3u8_id=f'hls-{version}',
248 note=f'Downloading {version} m3u8 information',
249 errnote=f'Failed to download {version} m3u8 information',
9bccdc70
RA
250 fatal=False))
251 else:
252 m = re.search(r'''(?xi)
9c0412cf 253 _(?P<quality>[a-z0-9]+)
9bccdc70
RA
254 _(?P<width>[0-9]+)x(?P<height>[0-9]+)
255 _(?P<vcodec>[a-z0-9]+)
256 _(?P<vbr>[0-9]+)
257 _(?P<acodec>[a-z0-9]+)
258 _(?P<abr>[0-9]+)
259 \.(?P<ext>[a-z0-9]+)''', version_url)
260 if not m:
9165d6ba 261 continue
9165d6ba 262
9bccdc70
RA
263 formats.append({
264 'url': version_url,
9c0412cf 265 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
9bccdc70
RA
266 'vcodec': m.group('vcodec'),
267 'acodec': m.group('acodec'),
268 'vbr': int(m.group('vbr')),
269 'abr': int(m.group('abr')),
270 'ext': m.group('ext'),
271 'width': int(m.group('width')),
272 'height': int(m.group('height')),
273 })
27579b9e 274
881dbc86 275 track = video_info['title']
9508738f 276 if featured_artist:
add96eb9 277 artist = f'{artist} ft. {featured_artist}'
278 title = f'{artist} - {track}' if artist else track
5c9ced95
S
279
280 genres = video_info.get('genres')
281 genre = (
3089bc74 282 genres[0] if genres and isinstance(genres, list)
add96eb9 283 and isinstance(genres[0], str) else None)
9165d6ba 284
6cadf8c8
PH
285 is_explicit = video_info.get('isExplicit')
286 if is_explicit is True:
287 age_limit = 18
288 elif is_explicit is False:
289 age_limit = 0
290 else:
291 age_limit = None
292
45d7bc2f 293 return {
88bd97e3 294 'id': video_id,
ff51983e 295 'title': title,
88bd97e3 296 'formats': formats,
9165d6ba 297 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
9bccdc70 298 'timestamp': parse_iso8601(video_info.get('releaseDate')),
9165d6ba 299 'uploader': uploader,
9bccdc70
RA
300 'duration': int_or_none(video_info.get('duration')),
301 'view_count': int_or_none(video_info.get('views', {}).get('total')),
6cadf8c8 302 'age_limit': age_limit,
881dbc86
S
303 'track': track,
304 'artist': uploader,
305 'genre': genre,
88bd97e3 306 }
e0da32df
S
307
308
9618c448 309class VevoPlaylistIE(VevoBaseIE):
92519402 310 _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
e0da32df
S
311
312 _TESTS = [{
e2bd301c
S
313 'url': 'http://www.vevo.com/watch/genre/rock',
314 'info_dict': {
315 'id': 'rock',
316 'title': 'Rock',
317 },
318 'playlist_count': 20,
e0da32df
S
319 }, {
320 'url': 'http://www.vevo.com/watch/genre/rock?index=0',
321 'only_matching': True,
322 }]
323
324 def _real_extract(self, url):
5ad28e7f 325 mobj = self._match_valid_url(url)
e2bd301c
S
326 playlist_id = mobj.group('id')
327 playlist_kind = mobj.group('kind')
e0da32df
S
328
329 webpage = self._download_webpage(url, playlist_id)
330
4dfbf869 331 qs = parse_qs(url)
e0da32df
S
332 index = qs.get('index', [None])[0]
333
334 if index:
335 video_id = self._search_regex(
336 r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
337 webpage, 'video id', default=None, group='id')
338 if video_id:
add96eb9 339 return self.url_result(f'vevo:{video_id}', VevoIE.ie_key())
e0da32df 340
add96eb9 341 playlists = self._extract_json(webpage, playlist_id)['default'][f'{playlist_kind}s']
e0da32df 342
add96eb9 343 playlist = (next(iter(playlists.values()))
e2bd301c 344 if playlist_kind == 'playlist' else playlists[playlist_id])
e0da32df
S
345
346 entries = [
add96eb9 347 self.url_result(f'vevo:{src}', VevoIE.ie_key())
e0da32df
S
348 for src in playlist['isrcs']]
349
350 return self.playlist_result(
78a3ff33 351 entries, playlist.get('playlistId') or playlist_id,
e0da32df 352 playlist.get('name'), playlist.get('description'))