jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import json
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import (
	8	compat_str,
	9	compat_urlparse,
	10	)
	11	from ..utils import (
	12	ExtractorError,
	13	)
	14
	15
	16	class BandcampIE(InfoExtractor):
	17	_VALID_URL = r'https?://.?\.bandcamp\.com/track/(?P<title>.)'
	18	_TESTS = [{
	19	'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
	20	'md5': 'c557841d5e50261777a6585648adf439',
	21	'info_dict': {
	22	'id': '1812978515',
	23	'ext': 'mp3',
	24	'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
	25	'duration': 9.8485,
	26	},
	27	'_skip': 'There is a limit of 200 free downloads / month for the test song'
	28	}, {
	29	'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
	30	'md5': '2b68e5851514c20efdff2afc5603b8b4',
	31	'info_dict': {
	32	'id': '2650410135',
	33	'ext': 'mp3',
	34	'title': 'Lanius (Battle)',
	35	'uploader': 'Ben Prunty Music',
	36	},
	37	}]
	38
	39	def _real_extract(self, url):
	40	mobj = re.match(self._VALID_URL, url)
	41	title = mobj.group('title')
	42	webpage = self._download_webpage(url, title)
	43	m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
	44	if not m_download:
	45	m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
	46	if m_trackinfo:
	47	json_code = m_trackinfo.group(1)
	48	data = json.loads(json_code)[0]
	49
	50	formats = []
	51	for format_id, format_url in data['file'].items():
	52	ext, abr_str = format_id.split('-', 1)
	53	formats.append({
	54	'format_id': format_id,
	55	'url': format_url,
	56	'ext': ext,
	57	'vcodec': 'none',
	58	'acodec': ext,
	59	'abr': int(abr_str),
	60	})
	61
	62	self._sort_formats(formats)
	63
	64	return {
	65	'id': compat_str(data['id']),
	66	'title': data['title'],
	67	'formats': formats,
	68	'duration': float(data['duration']),
	69	}
	70	else:
	71	raise ExtractorError('No free songs found')
	72
	73	download_link = m_download.group(1)
	74	video_id = self._search_regex(
	75	r'(?ms)var TralbumData = .?[{,]\sid: (?P<id>\d+),?$',
	76	webpage, 'video id')
	77
	78	download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
	79	# We get the dictionary of the track from some javascript code
	80	all_info = self._parse_json(self._search_regex(
	81	r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
	82	info = all_info[0]
	83	# We pick mp3-320 for now, until format selection can be easily implemented.
	84	mp3_info = info['downloads']['mp3-320']
	85	# If we try to use this url it says the link has expired
	86	initial_url = mp3_info['url']
	87	m_url = re.match(
	88	r'(?P<server>http://(.?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.?)&id=(?P<id>.?)&ts=(?P<ts>.)$',
	89	initial_url)
	90	# We build the url we will use to get the final track url
	91	# This url is build in Bandcamp in the script download_bunde_*.js
	92	request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
	93	final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
	94	# If we could correctly generate the .rand field the url would be
	95	# in the "download_url" key
	96	final_url = self._search_regex(
	97	r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
	98
	99	return {
	100	'id': video_id,
	101	'title': info['title'],
	102	'ext': 'mp3',
	103	'vcodec': 'none',
	104	'url': final_url,
	105	'thumbnail': info.get('thumb_url'),
	106	'uploader': info.get('artist'),
	107	}
	108
	109
	110	class BandcampAlbumIE(InfoExtractor):
	111	IE_NAME = 'Bandcamp:album'
	112	_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)\|/?(?:$\|[?#]))'
	113
	114	_TESTS = [{
	115	'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
	116	'playlist': [
	117	{
	118	'md5': '39bc1eded3476e927c724321ddf116cf',
	119	'info_dict': {
	120	'id': '1353101989',
	121	'ext': 'mp3',
	122	'title': 'Intro',
	123	}
	124	},
	125	{
	126	'md5': '1a2c32e2691474643e912cc6cd4bffaa',
	127	'info_dict': {
	128	'id': '38097443',
	129	'ext': 'mp3',
	130	'title': 'Kero One - Keep It Alive (Blazo remix)',
	131	}
	132	},
	133	],
	134	'info_dict': {
	135	'title': 'Jazz Format Mixtape vol.1',
	136	'id': 'jazz-format-mixtape-vol-1',
	137	'uploader_id': 'blazo',
	138	},
	139	'params': {
	140	'playlistend': 2
	141	},
	142	'skip': 'Bandcamp imposes download limits.'
	143	}, {
	144	'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
	145	'info_dict': {
	146	'title': 'Hierophany of the Open Grave',
	147	'uploader_id': 'nightbringer',
	148	'id': 'hierophany-of-the-open-grave',
	149	},
	150	'playlist_mincount': 9,
	151	}, {
	152	'url': 'http://dotscale.bandcamp.com',
	153	'info_dict': {
	154	'title': 'Loom',
	155	'id': 'dotscale',
	156	'uploader_id': 'dotscale',
	157	},
	158	'playlist_mincount': 7,
	159	}]
	160
	161	def _real_extract(self, url):
	162	mobj = re.match(self._VALID_URL, url)
	163	uploader_id = mobj.group('subdomain')
	164	album_id = mobj.group('album_id')
	165	playlist_id = album_id or uploader_id
	166	webpage = self._download_webpage(url, playlist_id)
	167	tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
	168	if not tracks_paths:
	169	raise ExtractorError('The page doesn\'t contain any tracks')
	170	entries = [
	171	self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
	172	for t_path in tracks_paths]
	173	title = self._search_regex(
	174	r'album_title\s:\s"(.*?)"', webpage, 'title', fatal=False)
	175	return {
	176	'_type': 'playlist',
	177	'uploader_id': uploader_id,
	178	'id': playlist_id,
	179	'title': title,
	180	'entries': entries,
	181	}