jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import itertools
	2
	3	from .common import InfoExtractor
	4	from ..compat import (
	5	compat_b64decode,
	6	compat_ord,
	7	compat_str,
	8	compat_urllib_parse_unquote,
	9	)
	10	from ..utils import (
	11	ExtractorError,
	12	int_or_none,
	13	parse_iso8601,
	14	strip_or_none,
	15	try_get,
	16	)
	17
	18
	19	class MixcloudBaseIE(InfoExtractor):
	20	def _call_api(self, object_type, object_fields, display_id, username, slug=None):
	21	lookup_key = object_type + 'Lookup'
	22	return self._download_json(
	23	'https://www.mixcloud.com/graphql', display_id, query={
	24	'query': '''{
	25	%s(lookup: {username: "%s"%s}) {
	26	%s
	27	}
	28	}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
	29	})['data'][lookup_key]
	30
	31
	32	class MixcloudIE(MixcloudBaseIE):
	33	_VALID_URL = r'https?://(?:(?:www\|beta\|m)\.)?mixcloud\.com/([^/]+)/(?!stream\|uploads\|favorites\|listens\|playlists)([^/]+)'
	34	IE_NAME = 'mixcloud'
	35
	36	_TESTS = [{
	37	'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
	38	'info_dict': {
	39	'id': 'dholbach_cryptkeeper',
	40	'ext': 'm4a',
	41	'title': 'Cryptkeeper',
	42	'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
	43	'uploader': 'Daniel Holbach',
	44	'uploader_id': 'dholbach',
	45	'thumbnail': r're:https?://.*\.jpg',
	46	'view_count': int,
	47	'timestamp': 1321359578,
	48	'upload_date': '20111115',
	49	},
	50	}, {
	51	'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
	52	'info_dict': {
	53	'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
	54	'ext': 'mp3',
	55	'title': 'Caribou 7 inch Vinyl Mix & Chat',
	56	'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
	57	'uploader': 'Gilles Peterson Worldwide',
	58	'uploader_id': 'gillespeterson',
	59	'thumbnail': 're:https?://.*',
	60	'view_count': int,
	61	'timestamp': 1422987057,
	62	'upload_date': '20150203',
	63	},
	64	}, {
	65	'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
	66	'only_matching': True,
	67	}]
	68	_DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
	69
	70	@staticmethod
	71	def _decrypt_xor_cipher(key, ciphertext):
	72	"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
	73	return ''.join([
	74	chr(compat_ord(ch) ^ compat_ord(k))
	75	for ch, k in zip(ciphertext, itertools.cycle(key))])
	76
	77	def _real_extract(self, url):
	78	username, slug = self._match_valid_url(url).groups()
	79	username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
	80	track_id = '%s_%s' % (username, slug)
	81
	82	cloudcast = self._call_api('cloudcast', '''audioLength
	83	comments(first: 100) {
	84	edges {
	85	node {
	86	comment
	87	created
	88	user {
	89	displayName
	90	username
	91	}
	92	}
	93	}
	94	totalCount
	95	}
	96	description
	97	favorites {
	98	totalCount
	99	}
	100	featuringArtistList
	101	isExclusive
	102	name
	103	owner {
	104	displayName
	105	url
	106	username
	107	}
	108	picture(width: 1024, height: 1024) {
	109	url
	110	}
	111	plays
	112	publishDate
	113	reposts {
	114	totalCount
	115	}
	116	streamInfo {
	117	dashUrl
	118	hlsUrl
	119	url
	120	}
	121	tags {
	122	tag {
	123	name
	124	}
	125	}
	126	restrictedReason
	127	id''', track_id, username, slug)
	128
	129	if not cloudcast:
	130	raise ExtractorError('Track not found', expected=True)
	131
	132	reason = cloudcast.get('restrictedReason')
	133	if reason == 'tracklist':
	134	raise ExtractorError('Track unavailable in your country due to licensing restrictions', expected=True)
	135	elif reason == 'repeat_play':
	136	raise ExtractorError('You have reached your play limit for this track', expected=True)
	137	elif reason:
	138	raise ExtractorError('Track is restricted', expected=True)
	139
	140	title = cloudcast['name']
	141
	142	stream_info = cloudcast['streamInfo']
	143	formats = []
	144
	145	for url_key in ('url', 'hlsUrl', 'dashUrl'):
	146	format_url = stream_info.get(url_key)
	147	if not format_url:
	148	continue
	149	decrypted = self._decrypt_xor_cipher(
	150	self._DECRYPTION_KEY, compat_b64decode(format_url))
	151	if url_key == 'hlsUrl':
	152	formats.extend(self._extract_m3u8_formats(
	153	decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
	154	m3u8_id='hls', fatal=False))
	155	elif url_key == 'dashUrl':
	156	formats.extend(self._extract_mpd_formats(
	157	decrypted, track_id, mpd_id='dash', fatal=False))
	158	else:
	159	formats.append({
	160	'format_id': 'http',
	161	'url': decrypted,
	162	'downloader_options': {
	163	# Mixcloud starts throttling at >~5M
	164	'http_chunk_size': 5242880,
	165	},
	166	})
	167
	168	if not formats and cloudcast.get('isExclusive'):
	169	self.raise_login_required(metadata_available=True)
	170
	171	self._sort_formats(formats)
	172
	173	comments = []
	174	for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
	175	node = edge.get('node') or {}
	176	text = strip_or_none(node.get('comment'))
	177	if not text:
	178	continue
	179	user = node.get('user') or {}
	180	comments.append({
	181	'author': user.get('displayName'),
	182	'author_id': user.get('username'),
	183	'text': text,
	184	'timestamp': parse_iso8601(node.get('created')),
	185	})
	186
	187	tags = []
	188	for t in cloudcast.get('tags'):
	189	tag = try_get(t, lambda x: x['tag']['name'], compat_str)
	190	if not tag:
	191	tags.append(tag)
	192
	193	get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
	194
	195	owner = cloudcast.get('owner') or {}
	196
	197	return {
	198	'id': track_id,
	199	'title': title,
	200	'formats': formats,
	201	'description': cloudcast.get('description'),
	202	'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
	203	'uploader': owner.get('displayName'),
	204	'timestamp': parse_iso8601(cloudcast.get('publishDate')),
	205	'uploader_id': owner.get('username'),
	206	'uploader_url': owner.get('url'),
	207	'duration': int_or_none(cloudcast.get('audioLength')),
	208	'view_count': int_or_none(cloudcast.get('plays')),
	209	'like_count': get_count('favorites'),
	210	'repost_count': get_count('reposts'),
	211	'comment_count': get_count('comments'),
	212	'comments': comments,
	213	'tags': tags,
	214	'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
	215	}
	216
	217
	218	class MixcloudPlaylistBaseIE(MixcloudBaseIE):
	219	def _get_cloudcast(self, node):
	220	return node
	221
	222	def _get_playlist_title(self, title, slug):
	223	return title
	224
	225	def _real_extract(self, url):
	226	username, slug = self._match_valid_url(url).groups()
	227	username = compat_urllib_parse_unquote(username)
	228	if not slug:
	229	slug = 'uploads'
	230	else:
	231	slug = compat_urllib_parse_unquote(slug)
	232	playlist_id = '%s_%s' % (username, slug)
	233
	234	is_playlist_type = self._ROOT_TYPE == 'playlist'
	235	playlist_type = 'items' if is_playlist_type else slug
	236	list_filter = ''
	237
	238	has_next_page = True
	239	entries = []
	240	while has_next_page:
	241	playlist = self._call_api(
	242	self._ROOT_TYPE, '''%s
	243	%s
	244	%s(first: 100%s) {
	245	edges {
	246	node {
	247	%s
	248	}
	249	}
	250	pageInfo {
	251	endCursor
	252	hasNextPage
	253	}
	254	}''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
	255	playlist_id, username, slug if is_playlist_type else None)
	256
	257	items = playlist.get(playlist_type) or {}
	258	for edge in items.get('edges', []):
	259	cloudcast = self._get_cloudcast(edge.get('node') or {})
	260	cloudcast_url = cloudcast.get('url')
	261	if not cloudcast_url:
	262	continue
	263	slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
	264	owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
	265	video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
	266	entries.append(self.url_result(
	267	cloudcast_url, MixcloudIE.ie_key(), video_id))
	268
	269	page_info = items['pageInfo']
	270	has_next_page = page_info['hasNextPage']
	271	list_filter = ', after: "%s"' % page_info['endCursor']
	272
	273	return self.playlist_result(
	274	entries, playlist_id,
	275	self._get_playlist_title(playlist[self._TITLE_KEY], slug),
	276	playlist.get(self._DESCRIPTION_KEY))
	277
	278
	279	class MixcloudUserIE(MixcloudPlaylistBaseIE):
	280	_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads\|favorites\|listens\|stream)?/?$'
	281	IE_NAME = 'mixcloud:user'
	282
	283	_TESTS = [{
	284	'url': 'http://www.mixcloud.com/dholbach/',
	285	'info_dict': {
	286	'id': 'dholbach_uploads',
	287	'title': 'Daniel Holbach (uploads)',
	288	'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
	289	},
	290	'playlist_mincount': 36,
	291	}, {
	292	'url': 'http://www.mixcloud.com/dholbach/uploads/',
	293	'info_dict': {
	294	'id': 'dholbach_uploads',
	295	'title': 'Daniel Holbach (uploads)',
	296	'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
	297	},
	298	'playlist_mincount': 36,
	299	}, {
	300	'url': 'http://www.mixcloud.com/dholbach/favorites/',
	301	'info_dict': {
	302	'id': 'dholbach_favorites',
	303	'title': 'Daniel Holbach (favorites)',
	304	'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
	305	},
	306	# 'params': {
	307	# 'playlist_items': '1-100',
	308	# },
	309	'playlist_mincount': 396,
	310	}, {
	311	'url': 'http://www.mixcloud.com/dholbach/listens/',
	312	'info_dict': {
	313	'id': 'dholbach_listens',
	314	'title': 'Daniel Holbach (listens)',
	315	'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
	316	},
	317	# 'params': {
	318	# 'playlist_items': '1-100',
	319	# },
	320	'playlist_mincount': 1623,
	321	'skip': 'Large list',
	322	}, {
	323	'url': 'https://www.mixcloud.com/FirstEar/stream/',
	324	'info_dict': {
	325	'id': 'FirstEar_stream',
	326	'title': 'First Ear (stream)',
	327	'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
	328	},
	329	'playlist_mincount': 271,
	330	}]
	331
	332	_TITLE_KEY = 'displayName'
	333	_DESCRIPTION_KEY = 'biog'
	334	_ROOT_TYPE = 'user'
	335	_NODE_TEMPLATE = '''slug
	336	url
	337	owner { username }'''
	338
	339	def _get_playlist_title(self, title, slug):
	340	return '%s (%s)' % (title, slug)
	341
	342
	343	class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
	344	_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
	345	IE_NAME = 'mixcloud:playlist'
	346
	347	_TESTS = [{
	348	'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
	349	'info_dict': {
	350	'id': 'maxvibes_jazzcat-on-ness-radio',
	351	'title': 'Ness Radio sessions',
	352	},
	353	'playlist_mincount': 59,
	354	}]
	355	_TITLE_KEY = 'name'
	356	_DESCRIPTION_KEY = 'description'
	357	_ROOT_TYPE = 'playlist'
	358	_NODE_TEMPLATE = '''cloudcast {
	359	slug
	360	url
	361	owner { username }
	362	}'''
	363
	364	def _get_cloudcast(self, node):
	365	return node.get('cloudcast') or {}