jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	ExtractorError,
	6	int_or_none,
	7	str_to_int,
	8	unified_strdate,
	9	)
	10
	11
	12	class RedTubeIE(InfoExtractor):
	13	_VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
	14	_TEST = {
	15	'url': 'http://www.redtube.com/66418',
	16	'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
	17	'info_dict': {
	18	'id': '66418',
	19	'ext': 'mp4',
	20	'title': 'Sucked on a toilet',
	21	'upload_date': '20120831',
	22	'duration': 596,
	23	'view_count': int,
	24	'age_limit': 18,
	25	}
	26	}
	27
	28	def _real_extract(self, url):
	29	video_id = self._match_id(url)
	30	webpage = self._download_webpage(url, video_id)
	31
	32	if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
	33	raise ExtractorError('Video %s has been removed' % video_id, expected=True)
	34
	35	title = self._html_search_regex(
	36	(r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
	37	r'videoTitle\s:\s(["\'])(?P<title>)\1'),
	38	webpage, 'title', group='title')
	39
	40	formats = []
	41	sources = self._parse_json(
	42	self._search_regex(
	43	r'sources\s:\s({.+?})', webpage, 'source', default='{}'),
	44	video_id, fatal=False)
	45	if sources and isinstance(sources, dict):
	46	for format_id, format_url in sources.items():
	47	if format_url:
	48	formats.append({
	49	'url': format_url,
	50	'format_id': format_id,
	51	'height': int_or_none(format_id),
	52	})
	53	else:
	54	video_url = self._html_search_regex(
	55	r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
	56	formats.append({'url': video_url})
	57	self._sort_formats(formats)
	58
	59	thumbnail = self._og_search_thumbnail(webpage)
	60	upload_date = unified_strdate(self._search_regex(
	61	r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
	62	webpage, 'upload date', fatal=False))
	63	duration = int_or_none(self._search_regex(
	64	r'videoDuration\s:\s(\d+)', webpage, 'duration', fatal=False))
	65	view_count = str_to_int(self._search_regex(
	66	r'<span[^>]>VIEWS</span></td>\s<td>([\d,.]+)',
	67	webpage, 'view count', fatal=False))
	68
	69	# No self-labeling, but they describe themselves as
	70	# "Home of Videos Porno"
	71	age_limit = 18
	72
	73	return {
	74	'id': video_id,
	75	'ext': 'mp4',
	76	'title': title,
	77	'thumbnail': thumbnail,
	78	'upload_date': upload_date,
	79	'duration': duration,
	80	'view_count': view_count,
	81	'age_limit': age_limit,
	82	'formats': formats,
	83	}