jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	from .common import InfoExtractor
	2	from ..utils import (
	3	determine_ext,
	4	ExtractorError,
	5	int_or_none,
	6	merge_dicts,
	7	str_to_int,
	8	unified_strdate,
	9	url_or_none,
	10	)
	11
	12
	13	class RedTubeIE(InfoExtractor):
	14	_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/\|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
	15	_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
	16	_TESTS = [{
	17	'url': 'https://www.redtube.com/38864951',
	18	'md5': '4fba70cbca3aefd25767ab4b523c9878',
	19	'info_dict': {
	20	'id': '38864951',
	21	'ext': 'mp4',
	22	'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
	23	'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
	24	'upload_date': '20210111',
	25	'timestamp': 1610343109,
	26	'duration': 646,
	27	'view_count': int,
	28	'age_limit': 18,
	29	'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
	30	},
	31	}, {
	32	'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
	33	'only_matching': True,
	34	}, {
	35	'url': 'http://it.redtube.com/66418',
	36	'only_matching': True,
	37	}]
	38
	39	def _real_extract(self, url):
	40	video_id = self._match_id(url)
	41	webpage = self._download_webpage(
	42	'http://www.redtube.com/%s' % video_id, video_id)
	43
	44	ERRORS = (
	45	(('video-deleted-info', '>This video has been removed'), 'has been removed'),
	46	(('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
	47	)
	48
	49	for patterns, message in ERRORS:
	50	if any(p in webpage for p in patterns):
	51	raise ExtractorError(
	52	'Video %s %s' % (video_id, message), expected=True)
	53
	54	info = self._search_json_ld(webpage, video_id, default={})
	55
	56	if not info.get('title'):
	57	info['title'] = self._html_search_regex(
	58	(r'<h(\d)[^>]+class="(?:video_title_text\|videoTitle\|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
	59	r'(?:videoTitle\|title)\s:\s(["\'])(?P<title>(?:(?!\1).)+)\1',),
	60	webpage, 'title', group='title',
	61	default=None) or self._og_search_title(webpage)
	62
	63	formats = []
	64	sources = self._parse_json(
	65	self._search_regex(
	66	r'sources\s:\s({.+?})', webpage, 'source', default='{}'),
	67	video_id, fatal=False)
	68	if sources and isinstance(sources, dict):
	69	for format_id, format_url in sources.items():
	70	if format_url:
	71	formats.append({
	72	'url': format_url,
	73	'format_id': format_id,
	74	'height': int_or_none(format_id),
	75	})
	76	medias = self._parse_json(
	77	self._search_regex(
	78	r'mediaDefinition["\']?\s:\s(\[.+?}\s*\])', webpage,
	79	'media definitions', default='{}'),
	80	video_id, fatal=False)
	81	for media in medias if isinstance(medias, list) else []:
	82	format_url = url_or_none(media.get('videoUrl'))
	83	if not format_url:
	84	continue
	85	format_id = media.get('format')
	86	quality = media.get('quality')
	87	if format_id == 'hls' or (format_id == 'mp4' and not quality):
	88	more_media = self._download_json(format_url, video_id, fatal=False)
	89	else:
	90	more_media = [media]
	91	for media in more_media if isinstance(more_media, list) else []:
	92	format_url = url_or_none(media.get('videoUrl'))
	93	if not format_url:
	94	continue
	95	format_id = media.get('format')
	96	if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
	97	formats.extend(self._extract_m3u8_formats(
	98	format_url, video_id, 'mp4',
	99	entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
	100	fatal=False))
	101	continue
	102	format_id = media.get('quality')
	103	formats.append({
	104	'url': format_url,
	105	'ext': 'mp4',
	106	'format_id': format_id,
	107	'height': int_or_none(format_id),
	108	})
	109	if not formats:
	110	video_url = self._html_search_regex(
	111	r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
	112	formats.append({'url': video_url, 'ext': 'mp4'})
	113
	114	thumbnail = self._og_search_thumbnail(webpage)
	115	upload_date = unified_strdate(self._search_regex(
	116	r'<span[^>]+>(?:ADDED\|Published on) ([^<]+)<',
	117	webpage, 'upload date', default=None))
	118	duration = int_or_none(self._og_search_property(
	119	'video:duration', webpage, default=None) or self._search_regex(
	120	r'videoDuration\s:\s(\d+)', webpage, 'duration', default=None))
	121	view_count = str_to_int(self._search_regex(
	122	(r'<div[^>]>Views</div>\s<div[^>]>\s([\d,.]+)',
	123	r'<span[^>]>VIEWS</span>\s</td>\s<td>\s([\d,.]+)',
	124	r'<span[^>]+\bclass=["\']video_view_count[^>]>\s([\d,.]+)'),
	125	webpage, 'view count', default=None))
	126
	127	# No self-labeling, but they describe themselves as
	128	# "Home of Videos Porno"
	129	age_limit = 18
	130
	131	return merge_dicts(info, {
	132	'id': video_id,
	133	'ext': 'mp4',
	134	'thumbnail': thumbnail,
	135	'upload_date': upload_date,
	136	'duration': duration,
	137	'view_count': view_count,
	138	'age_limit': age_limit,
	139	'formats': formats,
	140	})