jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	float_or_none,
	6	get_element_by_class,
	7	get_element_by_id,
	8	unified_strdate,
	9	)
	10
	11
	12	class FreesoundIE(InfoExtractor):
	13	_VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
	14	_TEST = {
	15	'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
	16	'md5': '12280ceb42c81f19a515c745eae07650',
	17	'info_dict': {
	18	'id': '194503',
	19	'ext': 'mp3',
	20	'title': 'gulls in the city.wav',
	21	'description': 'the sounds of seagulls in the city',
	22	'duration': 130.233,
	23	'uploader': 'miklovan',
	24	'upload_date': '20130715',
	25	'tags': list,
	26	}
	27	}
	28
	29	def _real_extract(self, url):
	30	audio_id = self._match_id(url)
	31
	32	webpage = self._download_webpage(url, audio_id)
	33
	34	audio_url = self._og_search_property('audio', webpage, 'song url')
	35	title = self._og_search_property('audio:title', webpage, 'song title')
	36
	37	description = self._html_search_regex(
	38	r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
	39	webpage, 'description', fatal=False)
	40
	41	duration = float_or_none(
	42	get_element_by_class('duration', webpage), scale=1000)
	43
	44	upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
	45	uploader = self._og_search_property(
	46	'audio:artist', webpage, 'uploader', fatal=False)
	47
	48	channels = self._html_search_regex(
	49	r'Channels</dt><dd>(.+?)</dd>', webpage,
	50	'channels info', fatal=False)
	51
	52	tags_str = get_element_by_class('tags', webpage)
	53	tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
	54
	55	audio_url = re.sub(r'^https?://freesound\.org(https?://)', r'\1', audio_url)
	56	audio_urls = [audio_url]
	57
	58	LQ_FORMAT = '-lq.mp3'
	59	if LQ_FORMAT in audio_url:
	60	audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
	61
	62	formats = [{
	63	'url': format_url,
	64	'format_note': channels,
	65	'quality': quality,
	66	} for quality, format_url in enumerate(audio_urls)]
	67
	68	return {
	69	'id': audio_id,
	70	'title': title,
	71	'description': description,
	72	'duration': duration,
	73	'uploader': uploader,
	74	'upload_date': upload_date,
	75	'tags': tags,
	76	'formats': formats,
	77	}