jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	determine_ext,
	6	int_or_none,
	7	NO_DEFAULT,
	8	str_to_int,
	9	)
	10
	11
	12	class XNXXIE(InfoExtractor):
	13	_VALID_URL = r'https?://(?:video\|www)\.xnxx3?\.com/video-?(?P<id>[0-9a-z]+)/'
	14	_TESTS = [{
	15	'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
	16	'md5': '7583e96c15c0f21e9da3453d9920fbba',
	17	'info_dict': {
	18	'id': '55awb78',
	19	'ext': 'mp4',
	20	'title': 'Skyrim Test Video',
	21	'thumbnail': r're:^https?://.*\.jpg',
	22	'duration': 469,
	23	'view_count': int,
	24	'age_limit': 18,
	25	},
	26	}, {
	27	'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
	28	'only_matching': True,
	29	}, {
	30	'url': 'http://www.xnxx.com/video-55awb78/',
	31	'only_matching': True,
	32	}, {
	33	'url': 'http://www.xnxx3.com/video-55awb78/',
	34	'only_matching': True,
	35	}]
	36
	37	def _real_extract(self, url):
	38	video_id = self._match_id(url)
	39
	40	webpage = self._download_webpage(url, video_id)
	41
	42	def get(meta, default=NO_DEFAULT, fatal=True):
	43	return self._search_regex(
	44	r'set%s\s\(\s(["\'])(?P<value>(?:(?!\1).)+)\1' % meta,
	45	webpage, meta, default=default, fatal=fatal, group='value')
	46
	47	title = self._og_search_title(
	48	webpage, default=None) or get('VideoTitle')
	49
	50	formats = []
	51	for mobj in re.finditer(
	52	r'setVideo(?:Url(?P<id>Low\|High)\|HLS)\s\(\s(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
	53	format_url = mobj.group('url')
	54	if determine_ext(format_url) == 'm3u8':
	55	formats.extend(self._extract_m3u8_formats(
	56	format_url, video_id, 'mp4', entry_protocol='m3u8_native',
	57	quality=1, m3u8_id='hls', fatal=False))
	58	else:
	59	format_id = mobj.group('id')
	60	if format_id:
	61	format_id = format_id.lower()
	62	formats.append({
	63	'url': format_url,
	64	'format_id': format_id,
	65	'quality': -1 if format_id == 'low' else 0,
	66	})
	67
	68	thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
	69	'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
	70	duration = int_or_none(self._og_search_property('duration', webpage))
	71	view_count = str_to_int(self._search_regex(
	72	r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
	73	default=None))
	74
	75	return {
	76	'id': video_id,
	77	'title': title,
	78	'thumbnail': thumbnail,
	79	'duration': duration,
	80	'view_count': view_count,
	81	'age_limit': 18,
	82	'formats': formats,
	83	}