]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/vevo.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / vevo.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from ..networking.exceptions import HTTPError
6 from ..utils import (
7 ExtractorError,
8 int_or_none,
9 parse_iso8601,
10 parse_qs,
11 )
12
13
14 class VevoBaseIE(InfoExtractor):
15 def _extract_json(self, webpage, video_id):
16 return self._parse_json(
17 self._search_regex(
18 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
19 webpage, 'initial store'),
20 video_id)
21
22
23 class VevoIE(VevoBaseIE):
24 """
25 Accepts urls from vevo.com or in the format 'vevo:{id}'
26 (currently used by MTVIE and MySpaceIE)
27 """
28 _VALID_URL = r'''(?x)
29 (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
30 https?://cache\.vevo\.com/m/html/embed\.html\?video=|
31 https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
32 https?://embed\.vevo\.com/.*?[?&]isrc=|
33 https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
34 vevo:)
35 (?P<id>[^&?#]+)'''
36 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
37
38 _TESTS = [{
39 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
40 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
41 'info_dict': {
42 'id': 'GB1101300280',
43 'ext': 'mp4',
44 'title': 'Hurts - Somebody to Die For',
45 'timestamp': 1372057200,
46 'upload_date': '20130624',
47 'uploader': 'Hurts',
48 'track': 'Somebody to Die For',
49 'artist': 'Hurts',
50 'genre': 'Pop',
51 },
52 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
53 }, {
54 'note': 'v3 SMIL format',
55 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
56 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
57 'info_dict': {
58 'id': 'USUV71302923',
59 'ext': 'mp4',
60 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
61 'timestamp': 1392796919,
62 'upload_date': '20140219',
63 'uploader': 'Cassadee Pope',
64 'track': 'I Wish I Could Break Your Heart',
65 'artist': 'Cassadee Pope',
66 'genre': 'Country',
67 },
68 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
69 }, {
70 'note': 'Age-limited video',
71 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
72 'info_dict': {
73 'id': 'USRV81300282',
74 'ext': 'mp4',
75 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
76 'age_limit': 18,
77 'timestamp': 1372888800,
78 'upload_date': '20130703',
79 'uploader': 'Justin Timberlake',
80 'track': 'Tunnel Vision (Explicit)',
81 'artist': 'Justin Timberlake',
82 'genre': 'Pop',
83 },
84 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
85 }, {
86 'note': 'No video_info',
87 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
88 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
89 'info_dict': {
90 'id': 'USUV71503000',
91 'ext': 'mp4',
92 'title': 'K Camp ft. T.I. - Till I Die',
93 'age_limit': 18,
94 'timestamp': 1449468000,
95 'upload_date': '20151207',
96 'uploader': 'K Camp',
97 'track': 'Till I Die',
98 'artist': 'K Camp',
99 'genre': 'Hip-Hop',
100 },
101 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
102 }, {
103 'note': 'Featured test',
104 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
105 'md5': 'd28675e5e8805035d949dc5cf161071d',
106 'info_dict': {
107 'id': 'USUV71402190',
108 'ext': 'mp4',
109 'title': 'Lemaitre ft. LoLo - Wait',
110 'age_limit': 0,
111 'timestamp': 1413432000,
112 'upload_date': '20141016',
113 'uploader': 'Lemaitre',
114 'track': 'Wait',
115 'artist': 'Lemaitre',
116 'genre': 'Electronic',
117 },
118 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
119 }, {
120 'note': 'Only available via webpage',
121 'url': 'http://www.vevo.com/watch/GBUV71600656',
122 'md5': '67e79210613865b66a47c33baa5e37fe',
123 'info_dict': {
124 'id': 'GBUV71600656',
125 'ext': 'mp4',
126 'title': 'ABC - Viva Love',
127 'age_limit': 0,
128 'timestamp': 1461830400,
129 'upload_date': '20160428',
130 'uploader': 'ABC',
131 'track': 'Viva Love',
132 'artist': 'ABC',
133 'genre': 'Pop',
134 },
135 'expected_warnings': ['Failed to download video versions info'],
136 }, {
137 # no genres available
138 'url': 'http://www.vevo.com/watch/INS171400764',
139 'only_matching': True,
140 }, {
141 # Another case available only via the webpage; using streams/streamsV3 formats
142 # Geo-restricted to Netherlands/Germany
143 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
144 'only_matching': True,
145 }, {
146 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
147 'only_matching': True,
148 }, {
149 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
150 'only_matching': True,
151 }]
152 _VERSIONS = {
153 0: 'youtube', # only in AuthenticateVideo videoVersions
154 1: 'level3',
155 2: 'akamai',
156 3: 'level3',
157 4: 'amazon',
158 }
159
160 def _initialize_api(self, video_id):
161 webpage = self._download_webpage(
162 'https://accounts.vevo.com/token', None,
163 note='Retrieving oauth token',
164 errnote='Unable to retrieve oauth token',
165 data=json.dumps({
166 'client_id': 'SPupX1tvqFEopQ1YS6SS',
167 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
168 }).encode(),
169 headers={
170 'Content-Type': 'application/json',
171 })
172
173 if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
174 self.raise_geo_restricted(
175 f'{self.IE_NAME} said: This page is currently unavailable in your region')
176
177 auth_info = self._parse_json(webpage, video_id)
178 self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
179
180 def _call_api(self, path, *args, **kwargs):
181 try:
182 data = self._download_json(self._api_url_template % path, *args, **kwargs)
183 except ExtractorError as e:
184 if isinstance(e.cause, HTTPError):
185 errors = self._parse_json(e.cause.response.read().decode(), None)['errors']
186 error_message = ', '.join([error['message'] for error in errors])
187 raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
188 raise
189 return data
190
191 def _real_extract(self, url):
192 video_id = self._match_id(url)
193
194 self._initialize_api(video_id)
195
196 video_info = self._call_api(
197 f'video/{video_id}', video_id, 'Downloading api video info',
198 'Failed to download video info')
199
200 video_versions = self._call_api(
201 f'video/{video_id}/streams', video_id,
202 'Downloading video versions info',
203 'Failed to download video versions info',
204 fatal=False)
205
206 # Some videos are only available via webpage (e.g.
207 # https://github.com/ytdl-org/youtube-dl/issues/9366)
208 if not video_versions:
209 webpage = self._download_webpage(url, video_id)
210 json_data = self._extract_json(webpage, video_id)
211 if 'streams' in json_data.get('default', {}):
212 video_versions = json_data['default']['streams'][video_id][0]
213 else:
214 video_versions = [
215 value
216 for key, value in json_data['apollo']['data'].items()
217 if key.startswith(f'{video_id}.streams')]
218
219 uploader = None
220 artist = None
221 featured_artist = None
222 artists = video_info.get('artists')
223 for curr_artist in artists:
224 if curr_artist.get('role') == 'Featured':
225 featured_artist = curr_artist['name']
226 else:
227 artist = uploader = curr_artist['name']
228
229 formats = []
230 for video_version in video_versions:
231 version = self._VERSIONS.get(video_version.get('version'), 'generic')
232 version_url = video_version.get('url')
233 if not version_url:
234 continue
235
236 if '.ism' in version_url:
237 continue
238 elif '.mpd' in version_url:
239 formats.extend(self._extract_mpd_formats(
240 version_url, video_id, mpd_id=f'dash-{version}',
241 note=f'Downloading {version} MPD information',
242 errnote=f'Failed to download {version} MPD information',
243 fatal=False))
244 elif '.m3u8' in version_url:
245 formats.extend(self._extract_m3u8_formats(
246 version_url, video_id, 'mp4', 'm3u8_native',
247 m3u8_id=f'hls-{version}',
248 note=f'Downloading {version} m3u8 information',
249 errnote=f'Failed to download {version} m3u8 information',
250 fatal=False))
251 else:
252 m = re.search(r'''(?xi)
253 _(?P<quality>[a-z0-9]+)
254 _(?P<width>[0-9]+)x(?P<height>[0-9]+)
255 _(?P<vcodec>[a-z0-9]+)
256 _(?P<vbr>[0-9]+)
257 _(?P<acodec>[a-z0-9]+)
258 _(?P<abr>[0-9]+)
259 \.(?P<ext>[a-z0-9]+)''', version_url)
260 if not m:
261 continue
262
263 formats.append({
264 'url': version_url,
265 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
266 'vcodec': m.group('vcodec'),
267 'acodec': m.group('acodec'),
268 'vbr': int(m.group('vbr')),
269 'abr': int(m.group('abr')),
270 'ext': m.group('ext'),
271 'width': int(m.group('width')),
272 'height': int(m.group('height')),
273 })
274
275 track = video_info['title']
276 if featured_artist:
277 artist = f'{artist} ft. {featured_artist}'
278 title = f'{artist} - {track}' if artist else track
279
280 genres = video_info.get('genres')
281 genre = (
282 genres[0] if genres and isinstance(genres, list)
283 and isinstance(genres[0], str) else None)
284
285 is_explicit = video_info.get('isExplicit')
286 if is_explicit is True:
287 age_limit = 18
288 elif is_explicit is False:
289 age_limit = 0
290 else:
291 age_limit = None
292
293 return {
294 'id': video_id,
295 'title': title,
296 'formats': formats,
297 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
298 'timestamp': parse_iso8601(video_info.get('releaseDate')),
299 'uploader': uploader,
300 'duration': int_or_none(video_info.get('duration')),
301 'view_count': int_or_none(video_info.get('views', {}).get('total')),
302 'age_limit': age_limit,
303 'track': track,
304 'artist': uploader,
305 'genre': genre,
306 }
307
308
309 class VevoPlaylistIE(VevoBaseIE):
310 _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
311
312 _TESTS = [{
313 'url': 'http://www.vevo.com/watch/genre/rock',
314 'info_dict': {
315 'id': 'rock',
316 'title': 'Rock',
317 },
318 'playlist_count': 20,
319 }, {
320 'url': 'http://www.vevo.com/watch/genre/rock?index=0',
321 'only_matching': True,
322 }]
323
324 def _real_extract(self, url):
325 mobj = self._match_valid_url(url)
326 playlist_id = mobj.group('id')
327 playlist_kind = mobj.group('kind')
328
329 webpage = self._download_webpage(url, playlist_id)
330
331 qs = parse_qs(url)
332 index = qs.get('index', [None])[0]
333
334 if index:
335 video_id = self._search_regex(
336 r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
337 webpage, 'video id', default=None, group='id')
338 if video_id:
339 return self.url_result(f'vevo:{video_id}', VevoIE.ie_key())
340
341 playlists = self._extract_json(webpage, playlist_id)['default'][f'{playlist_kind}s']
342
343 playlist = (next(iter(playlists.values()))
344 if playlist_kind == 'playlist' else playlists[playlist_id])
345
346 entries = [
347 self.url_result(f'vevo:{src}', VevoIE.ie_key())
348 for src in playlist['isrcs']]
349
350 return self.playlist_result(
351 entries, playlist.get('playlistId') or playlist_id,
352 playlist.get('name'), playlist.get('description'))