jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import json
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import (
	8	compat_str,
	9	compat_urlparse,
	10	)
	11	from ..utils import (
	12	ExtractorError,
	13	float_or_none,
	14	int_or_none,
	15	)
	16
	17
	18	class BandcampIE(InfoExtractor):
	19	_VALID_URL = r'https?://.?\.bandcamp\.com/track/(?P<title>.)'
	20	_TESTS = [{
	21	'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
	22	'md5': 'c557841d5e50261777a6585648adf439',
	23	'info_dict': {
	24	'id': '1812978515',
	25	'ext': 'mp3',
	26	'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
	27	'duration': 9.8485,
	28	},
	29	'_skip': 'There is a limit of 200 free downloads / month for the test song'
	30	}, {
	31	'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
	32	'md5': '2b68e5851514c20efdff2afc5603b8b4',
	33	'info_dict': {
	34	'id': '2650410135',
	35	'ext': 'mp3',
	36	'title': 'Lanius (Battle)',
	37	'uploader': 'Ben Prunty Music',
	38	},
	39	}]
	40
	41	def _real_extract(self, url):
	42	mobj = re.match(self._VALID_URL, url)
	43	title = mobj.group('title')
	44	webpage = self._download_webpage(url, title)
	45	m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
	46	if not m_download:
	47	m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
	48	if m_trackinfo:
	49	json_code = m_trackinfo.group(1)
	50	data = json.loads(json_code)[0]
	51
	52	formats = []
	53	for format_id, format_url in data['file'].items():
	54	ext, abr_str = format_id.split('-', 1)
	55	formats.append({
	56	'format_id': format_id,
	57	'url': self._proto_relative_url(format_url, 'http:'),
	58	'ext': ext,
	59	'vcodec': 'none',
	60	'acodec': ext,
	61	'abr': int_or_none(abr_str),
	62	})
	63
	64	self._sort_formats(formats)
	65
	66	return {
	67	'id': compat_str(data['id']),
	68	'title': data['title'],
	69	'formats': formats,
	70	'duration': float_or_none(data.get('duration')),
	71	}
	72	else:
	73	raise ExtractorError('No free songs found')
	74
	75	download_link = m_download.group(1)
	76	video_id = self._search_regex(
	77	r'(?ms)var TralbumData = .?[{,]\sid: (?P<id>\d+),?$',
	78	webpage, 'video id')
	79
	80	download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
	81	# We get the dictionary of the track from some javascript code
	82	all_info = self._parse_json(self._search_regex(
	83	r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
	84	info = all_info[0]
	85	# We pick mp3-320 for now, until format selection can be easily implemented.
	86	mp3_info = info['downloads']['mp3-320']
	87	# If we try to use this url it says the link has expired
	88	initial_url = mp3_info['url']
	89	m_url = re.match(
	90	r'(?P<server>http://(.?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.?)&id=(?P<id>.?)&ts=(?P<ts>.)$',
	91	initial_url)
	92	# We build the url we will use to get the final track url
	93	# This url is build in Bandcamp in the script download_bunde_*.js
	94	request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
	95	final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
	96	# If we could correctly generate the .rand field the url would be
	97	# in the "download_url" key
	98	final_url = self._proto_relative_url(self._search_regex(
	99	r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:')
	100
	101	return {
	102	'id': video_id,
	103	'title': info['title'],
	104	'ext': 'mp3',
	105	'vcodec': 'none',
	106	'url': final_url,
	107	'thumbnail': info.get('thumb_url'),
	108	'uploader': info.get('artist'),
	109	}
	110
	111
	112	class BandcampAlbumIE(InfoExtractor):
	113	IE_NAME = 'Bandcamp:album'
	114	_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)\|/?(?:$\|[?#]))'
	115
	116	_TESTS = [{
	117	'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
	118	'playlist': [
	119	{
	120	'md5': '39bc1eded3476e927c724321ddf116cf',
	121	'info_dict': {
	122	'id': '1353101989',
	123	'ext': 'mp3',
	124	'title': 'Intro',
	125	}
	126	},
	127	{
	128	'md5': '1a2c32e2691474643e912cc6cd4bffaa',
	129	'info_dict': {
	130	'id': '38097443',
	131	'ext': 'mp3',
	132	'title': 'Kero One - Keep It Alive (Blazo remix)',
	133	}
	134	},
	135	],
	136	'info_dict': {
	137	'title': 'Jazz Format Mixtape vol.1',
	138	'id': 'jazz-format-mixtape-vol-1',
	139	'uploader_id': 'blazo',
	140	},
	141	'params': {
	142	'playlistend': 2
	143	},
	144	'skip': 'Bandcamp imposes download limits.'
	145	}, {
	146	'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
	147	'info_dict': {
	148	'title': 'Hierophany of the Open Grave',
	149	'uploader_id': 'nightbringer',
	150	'id': 'hierophany-of-the-open-grave',
	151	},
	152	'playlist_mincount': 9,
	153	}, {
	154	'url': 'http://dotscale.bandcamp.com',
	155	'info_dict': {
	156	'title': 'Loom',
	157	'id': 'dotscale',
	158	'uploader_id': 'dotscale',
	159	},
	160	'playlist_mincount': 7,
	161	}]
	162
	163	def _real_extract(self, url):
	164	mobj = re.match(self._VALID_URL, url)
	165	uploader_id = mobj.group('subdomain')
	166	album_id = mobj.group('album_id')
	167	playlist_id = album_id or uploader_id
	168	webpage = self._download_webpage(url, playlist_id)
	169	tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
	170	if not tracks_paths:
	171	raise ExtractorError('The page doesn\'t contain any tracks')
	172	entries = [
	173	self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
	174	for t_path in tracks_paths]
	175	title = self._search_regex(
	176	r'album_title\s:\s"(.*?)"', webpage, 'title', fatal=False)
	177	return {
	178	'_type': 'playlist',
	179	'uploader_id': uploader_id,
	180	'id': playlist_id,
	181	'title': title,
	182	'entries': entries,
	183	}