jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	parse_resolution,
	6	str_to_int,
	7	unified_strdate,
	8	urlencode_postdata,
	9	urljoin,
	10	)
	11
	12
	13	class RadioJavanIE(InfoExtractor):
	14	_VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
	15	_TEST = {
	16	'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
	17	'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
	18	'info_dict': {
	19	'id': 'chaartaar-ashoobam',
	20	'ext': 'mp4',
	21	'title': 'Chaartaar - Ashoobam',
	22	'thumbnail': r're:^https?://.*\.jpe?g$',
	23	'upload_date': '20150215',
	24	'view_count': int,
	25	'like_count': int,
	26	'dislike_count': int,
	27	}
	28	}
	29
	30	def _real_extract(self, url):
	31	video_id = self._match_id(url)
	32
	33	download_host = self._download_json(
	34	'https://www.radiojavan.com/videos/video_host', video_id,
	35	data=urlencode_postdata({'id': video_id}),
	36	headers={
	37	'Content-Type': 'application/x-www-form-urlencoded',
	38	'Referer': url,
	39	}).get('host', 'https://host1.rjmusicmedia.com')
	40
	41	webpage = self._download_webpage(url, video_id)
	42
	43	formats = []
	44	for format_id, _, video_path in re.findall(
	45	r'RJ\.video(?P<format_id>\d+[pPkK])\s=\s(["\'])(?P<url>(?:(?!\2).)+)\2',
	46	webpage):
	47	f = parse_resolution(format_id)
	48	f.update({
	49	'url': urljoin(download_host, video_path),
	50	'format_id': format_id,
	51	})
	52	formats.append(f)
	53
	54	title = self._og_search_title(webpage)
	55	thumbnail = self._og_search_thumbnail(webpage)
	56
	57	upload_date = unified_strdate(self._search_regex(
	58	r'class="date_added">Date added: ([^<]+)<',
	59	webpage, 'upload date', fatal=False))
	60
	61	view_count = str_to_int(self._search_regex(
	62	r'class="views">Plays: ([\d,]+)',
	63	webpage, 'view count', fatal=False))
	64	like_count = str_to_int(self._search_regex(
	65	r'class="rating">([\d,]+) likes',
	66	webpage, 'like count', fatal=False))
	67	dislike_count = str_to_int(self._search_regex(
	68	r'class="rating">([\d,]+) dislikes',
	69	webpage, 'dislike count', fatal=False))
	70
	71	return {
	72	'id': video_id,
	73	'title': title,
	74	'thumbnail': thumbnail,
	75	'upload_date': upload_date,
	76	'view_count': view_count,
	77	'like_count': like_count,
	78	'dislike_count': dislike_count,
	79	'formats': formats,
	80	}