jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_urlparse
	8	from ..utils import (
	9	HEADRequest,
	10	KNOWN_EXTENSIONS,
	11	sanitized_Request,
	12	str_to_int,
	13	urlencode_postdata,
	14	urlhandle_detect_ext,
	15	)
	16
	17
	18	class HearThisAtIE(InfoExtractor):
	19	_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
	20	_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
	21	_TESTS = [{
	22	'url': 'https://hearthis.at/moofi/dr-kreep',
	23	'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
	24	'info_dict': {
	25	'id': '150939',
	26	'ext': 'wav',
	27	'title': 'Moofi - Dr. Kreep',
	28	'thumbnail': r're:^https?://.*\.jpg$',
	29	'timestamp': 1421564134,
	30	'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
	31	'upload_date': '20150118',
	32	'comment_count': int,
	33	'view_count': int,
	34	'like_count': int,
	35	'duration': 71,
	36	'categories': ['Experimental'],
	37	}
	38	}, {
	39	# 'download' link redirects to the original webpage
	40	'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
	41	'md5': '5980ceb7c461605d30f1f039df160c6e',
	42	'info_dict': {
	43	'id': '811296',
	44	'ext': 'mp3',
	45	'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
	46	'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
	47	'upload_date': '20160328',
	48	'timestamp': 1459186146,
	49	'thumbnail': r're:^https?://.*\.jpg$',
	50	'comment_count': int,
	51	'view_count': int,
	52	'like_count': int,
	53	'duration': 4360,
	54	'categories': ['Dance'],
	55	},
	56	}]
	57
	58	def _real_extract(self, url):
	59	m = re.match(self._VALID_URL, url)
	60	display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
	61
	62	webpage = self._download_webpage(url, display_id)
	63	track_id = self._search_regex(
	64	r'intTrackId\s=\s(\d+)', webpage, 'track ID')
	65
	66	payload = urlencode_postdata({'tracks[]': track_id})
	67	req = sanitized_Request(self._PLAYLIST_URL, payload)
	68	req.add_header('Content-type', 'application/x-www-form-urlencoded')
	69
	70	track = self._download_json(req, track_id, 'Downloading playlist')[0]
	71	title = '{artist:s} - {title:s}'.format(**track)
	72
	73	categories = None
	74	if track.get('category'):
	75	categories = [track['category']]
	76
	77	description = self._og_search_description(webpage)
	78	thumbnail = self._og_search_thumbnail(webpage)
	79
	80	meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
	81	view_count = str_to_int(self._search_regex(
	82	meta_span % 'plays_count', webpage, 'view count', fatal=False))
	83	like_count = str_to_int(self._search_regex(
	84	meta_span % 'likes_count', webpage, 'like count', fatal=False))
	85	comment_count = str_to_int(self._search_regex(
	86	meta_span % 'comment_count', webpage, 'comment count', fatal=False))
	87	duration = str_to_int(self._search_regex(
	88	r'data-length="(\d+)', webpage, 'duration', fatal=False))
	89	timestamp = str_to_int(self._search_regex(
	90	r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
	91
	92	formats = []
	93	mp3_url = self._search_regex(
	94	r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
	95	webpage, 'mp3 URL', fatal=False)
	96	if mp3_url:
	97	formats.append({
	98	'format_id': 'mp3',
	99	'vcodec': 'none',
	100	'acodec': 'mp3',
	101	'url': mp3_url,
	102	})
	103	download_path = self._search_regex(
	104	r'<a class="[^"]download_fct[^"]"\s+href="([^"]+)"',
	105	webpage, 'download URL', default=None)
	106	if download_path:
	107	download_url = compat_urlparse.urljoin(url, download_path)
	108	ext_req = HEADRequest(download_url)
	109	ext_handle = self._request_webpage(
	110	ext_req, display_id, note='Determining extension')
	111	ext = urlhandle_detect_ext(ext_handle)
	112	if ext in KNOWN_EXTENSIONS:
	113	formats.append({
	114	'format_id': 'download',
	115	'vcodec': 'none',
	116	'ext': ext,
	117	'url': download_url,
	118	'quality': 2, # Usually better quality
	119	})
	120	self._sort_formats(formats)
	121
	122	return {
	123	'id': track_id,
	124	'display_id': display_id,
	125	'title': title,
	126	'formats': formats,
	127	'thumbnail': thumbnail,
	128	'description': description,
	129	'duration': duration,
	130	'timestamp': timestamp,
	131	'view_count': view_count,
	132	'comment_count': comment_count,
	133	'like_count': like_count,
	134	'categories': categories,
	135	}