jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import json
	2	import re
	3
	4	from .common import InfoExtractor
	5	from ..networking.exceptions import HTTPError
	6	from ..utils import (
	7	ExtractorError,
	8	int_or_none,
	9	parse_iso8601,
	10	parse_qs,
	11	)
	12
	13
	14	class VevoBaseIE(InfoExtractor):
	15	def _extract_json(self, webpage, video_id):
	16	return self._parse_json(
	17	self._search_regex(
	18	r'window\.__INITIAL_STORE__\s=\s({.+?});\s*</script>',
	19	webpage, 'initial store'),
	20	video_id)
	21
	22
	23	class VevoIE(VevoBaseIE):
	24	"""
	25	Accepts urls from vevo.com or in the format 'vevo:{id}'
	26	(currently used by MTVIE and MySpaceIE)
	27	"""
	28	_VALID_URL = r'''(?x)
	29	(?:https?://(?:www\.)?vevo\.com/watch/(?!playlist\|genre)(?:[^/]+/(?:[^/]+/)?)?\|
	30	https?://cache\.vevo\.com/m/html/embed\.html\?video=\|
	31	https?://videoplayer\.vevo\.com/embed/embedded\?videoId=\|
	32	https?://embed\.vevo\.com/.*?[?&]isrc=\|
	33	https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/\|
	34	vevo:)
	35	(?P<id>[^&?#]+)'''
	36	_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
	37
	38	_TESTS = [{
	39	'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
	40	'md5': '95ee28ee45e70130e3ab02b0f579ae23',
	41	'info_dict': {
	42	'id': 'GB1101300280',
	43	'ext': 'mp4',
	44	'title': 'Hurts - Somebody to Die For',
	45	'timestamp': 1372057200,
	46	'upload_date': '20130624',
	47	'uploader': 'Hurts',
	48	'track': 'Somebody to Die For',
	49	'artist': 'Hurts',
	50	'genre': 'Pop',
	51	},
	52	'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
	53	}, {
	54	'note': 'v3 SMIL format',
	55	'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
	56	'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
	57	'info_dict': {
	58	'id': 'USUV71302923',
	59	'ext': 'mp4',
	60	'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
	61	'timestamp': 1392796919,
	62	'upload_date': '20140219',
	63	'uploader': 'Cassadee Pope',
	64	'track': 'I Wish I Could Break Your Heart',
	65	'artist': 'Cassadee Pope',
	66	'genre': 'Country',
	67	},
	68	'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
	69	}, {
	70	'note': 'Age-limited video',
	71	'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
	72	'info_dict': {
	73	'id': 'USRV81300282',
	74	'ext': 'mp4',
	75	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	76	'age_limit': 18,
	77	'timestamp': 1372888800,
	78	'upload_date': '20130703',
	79	'uploader': 'Justin Timberlake',
	80	'track': 'Tunnel Vision (Explicit)',
	81	'artist': 'Justin Timberlake',
	82	'genre': 'Pop',
	83	},
	84	'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
	85	}, {
	86	'note': 'No video_info',
	87	'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
	88	'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
	89	'info_dict': {
	90	'id': 'USUV71503000',
	91	'ext': 'mp4',
	92	'title': 'K Camp ft. T.I. - Till I Die',
	93	'age_limit': 18,
	94	'timestamp': 1449468000,
	95	'upload_date': '20151207',
	96	'uploader': 'K Camp',
	97	'track': 'Till I Die',
	98	'artist': 'K Camp',
	99	'genre': 'Hip-Hop',
	100	},
	101	'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
	102	}, {
	103	'note': 'Featured test',
	104	'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
	105	'md5': 'd28675e5e8805035d949dc5cf161071d',
	106	'info_dict': {
	107	'id': 'USUV71402190',
	108	'ext': 'mp4',
	109	'title': 'Lemaitre ft. LoLo - Wait',
	110	'age_limit': 0,
	111	'timestamp': 1413432000,
	112	'upload_date': '20141016',
	113	'uploader': 'Lemaitre',
	114	'track': 'Wait',
	115	'artist': 'Lemaitre',
	116	'genre': 'Electronic',
	117	},
	118	'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
	119	}, {
	120	'note': 'Only available via webpage',
	121	'url': 'http://www.vevo.com/watch/GBUV71600656',
	122	'md5': '67e79210613865b66a47c33baa5e37fe',
	123	'info_dict': {
	124	'id': 'GBUV71600656',
	125	'ext': 'mp4',
	126	'title': 'ABC - Viva Love',
	127	'age_limit': 0,
	128	'timestamp': 1461830400,
	129	'upload_date': '20160428',
	130	'uploader': 'ABC',
	131	'track': 'Viva Love',
	132	'artist': 'ABC',
	133	'genre': 'Pop',
	134	},
	135	'expected_warnings': ['Failed to download video versions info'],
	136	}, {
	137	# no genres available
	138	'url': 'http://www.vevo.com/watch/INS171400764',
	139	'only_matching': True,
	140	}, {
	141	# Another case available only via the webpage; using streams/streamsV3 formats
	142	# Geo-restricted to Netherlands/Germany
	143	'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
	144	'only_matching': True,
	145	}, {
	146	'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
	147	'only_matching': True,
	148	}, {
	149	'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
	150	'only_matching': True,
	151	}]
	152	_VERSIONS = {
	153	0: 'youtube', # only in AuthenticateVideo videoVersions
	154	1: 'level3',
	155	2: 'akamai',
	156	3: 'level3',
	157	4: 'amazon',
	158	}
	159
	160	def _initialize_api(self, video_id):
	161	webpage = self._download_webpage(
	162	'https://accounts.vevo.com/token', None,
	163	note='Retrieving oauth token',
	164	errnote='Unable to retrieve oauth token',
	165	data=json.dumps({
	166	'client_id': 'SPupX1tvqFEopQ1YS6SS',
	167	'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
	168	}).encode(),
	169	headers={
	170	'Content-Type': 'application/json',
	171	})
	172
	173	if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
	174	self.raise_geo_restricted(
	175	f'{self.IE_NAME} said: This page is currently unavailable in your region')
	176
	177	auth_info = self._parse_json(webpage, video_id)
	178	self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
	179
	180	def _call_api(self, path, args, *kwargs):
	181	try:
	182	data = self._download_json(self._api_url_template % path, args, *kwargs)
	183	except ExtractorError as e:
	184	if isinstance(e.cause, HTTPError):
	185	errors = self._parse_json(e.cause.response.read().decode(), None)['errors']
	186	error_message = ', '.join([error['message'] for error in errors])
	187	raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
	188	raise
	189	return data
	190
	191	def _real_extract(self, url):
	192	video_id = self._match_id(url)
	193
	194	self._initialize_api(video_id)
	195
	196	video_info = self._call_api(
	197	f'video/{video_id}', video_id, 'Downloading api video info',
	198	'Failed to download video info')
	199
	200	video_versions = self._call_api(
	201	f'video/{video_id}/streams', video_id,
	202	'Downloading video versions info',
	203	'Failed to download video versions info',
	204	fatal=False)
	205
	206	# Some videos are only available via webpage (e.g.
	207	# https://github.com/ytdl-org/youtube-dl/issues/9366)
	208	if not video_versions:
	209	webpage = self._download_webpage(url, video_id)
	210	json_data = self._extract_json(webpage, video_id)
	211	if 'streams' in json_data.get('default', {}):
	212	video_versions = json_data['default']['streams'][video_id][0]
	213	else:
	214	video_versions = [
	215	value
	216	for key, value in json_data['apollo']['data'].items()
	217	if key.startswith(f'{video_id}.streams')]
	218
	219	uploader = None
	220	artist = None
	221	featured_artist = None
	222	artists = video_info.get('artists')
	223	for curr_artist in artists:
	224	if curr_artist.get('role') == 'Featured':
	225	featured_artist = curr_artist['name']
	226	else:
	227	artist = uploader = curr_artist['name']
	228
	229	formats = []
	230	for video_version in video_versions:
	231	version = self._VERSIONS.get(video_version.get('version'), 'generic')
	232	version_url = video_version.get('url')
	233	if not version_url:
	234	continue
	235
	236	if '.ism' in version_url:
	237	continue
	238	elif '.mpd' in version_url:
	239	formats.extend(self._extract_mpd_formats(
	240	version_url, video_id, mpd_id=f'dash-{version}',
	241	note=f'Downloading {version} MPD information',
	242	errnote=f'Failed to download {version} MPD information',
	243	fatal=False))
	244	elif '.m3u8' in version_url:
	245	formats.extend(self._extract_m3u8_formats(
	246	version_url, video_id, 'mp4', 'm3u8_native',
	247	m3u8_id=f'hls-{version}',
	248	note=f'Downloading {version} m3u8 information',
	249	errnote=f'Failed to download {version} m3u8 information',
	250	fatal=False))
	251	else:
	252	m = re.search(r'''(?xi)
	253	_(?P<quality>[a-z0-9]+)
	254	_(?P<width>[0-9]+)x(?P<height>[0-9]+)
	255	_(?P<vcodec>[a-z0-9]+)
	256	_(?P<vbr>[0-9]+)
	257	_(?P<acodec>[a-z0-9]+)
	258	_(?P<abr>[0-9]+)
	259	\.(?P<ext>[a-z0-9]+)''', version_url)
	260	if not m:
	261	continue
	262
	263	formats.append({
	264	'url': version_url,
	265	'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
	266	'vcodec': m.group('vcodec'),
	267	'acodec': m.group('acodec'),
	268	'vbr': int(m.group('vbr')),
	269	'abr': int(m.group('abr')),
	270	'ext': m.group('ext'),
	271	'width': int(m.group('width')),
	272	'height': int(m.group('height')),
	273	})
	274
	275	track = video_info['title']
	276	if featured_artist:
	277	artist = f'{artist} ft. {featured_artist}'
	278	title = f'{artist} - {track}' if artist else track
	279
	280	genres = video_info.get('genres')
	281	genre = (
	282	genres[0] if genres and isinstance(genres, list)
	283	and isinstance(genres[0], str) else None)
	284
	285	is_explicit = video_info.get('isExplicit')
	286	if is_explicit is True:
	287	age_limit = 18
	288	elif is_explicit is False:
	289	age_limit = 0
	290	else:
	291	age_limit = None
	292
	293	return {
	294	'id': video_id,
	295	'title': title,
	296	'formats': formats,
	297	'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
	298	'timestamp': parse_iso8601(video_info.get('releaseDate')),
	299	'uploader': uploader,
	300	'duration': int_or_none(video_info.get('duration')),
	301	'view_count': int_or_none(video_info.get('views', {}).get('total')),
	302	'age_limit': age_limit,
	303	'track': track,
	304	'artist': uploader,
	305	'genre': genre,
	306	}
	307
	308
	309	class VevoPlaylistIE(VevoBaseIE):
	310	_VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist\|genre)/(?P<id>[^/?#&]+)'
	311
	312	_TESTS = [{
	313	'url': 'http://www.vevo.com/watch/genre/rock',
	314	'info_dict': {
	315	'id': 'rock',
	316	'title': 'Rock',
	317	},
	318	'playlist_count': 20,
	319	}, {
	320	'url': 'http://www.vevo.com/watch/genre/rock?index=0',
	321	'only_matching': True,
	322	}]
	323
	324	def _real_extract(self, url):
	325	mobj = self._match_valid_url(url)
	326	playlist_id = mobj.group('id')
	327	playlist_kind = mobj.group('kind')
	328
	329	webpage = self._download_webpage(url, playlist_id)
	330
	331	qs = parse_qs(url)
	332	index = qs.get('index', [None])[0]
	333
	334	if index:
	335	video_id = self._search_regex(
	336	r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
	337	webpage, 'video id', default=None, group='id')
	338	if video_id:
	339	return self.url_result(f'vevo:{video_id}', VevoIE.ie_key())
	340
	341	playlists = self._extract_json(webpage, playlist_id)['default'][f'{playlist_kind}s']
	342
	343	playlist = (next(iter(playlists.values()))
	344	if playlist_kind == 'playlist' else playlists[playlist_id])
	345
	346	entries = [
	347	self.url_result(f'vevo:{src}', VevoIE.ie_key())
	348	for src in playlist['isrcs']]
	349
	350	return self.playlist_result(
	351	entries, playlist.get('playlistId') or playlist_id,
	352	playlist.get('name'), playlist.get('description'))