jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	float_or_none,
	6	int_or_none,
	7	merge_dicts,
	8	str_or_none,
	9	str_to_int,
	10	url_or_none,
	11	)
	12
	13
	14	class SpankwireIE(InfoExtractor):
	15	_VALID_URL = r'''(?x)
	16	https?://
	17	(?:www\.)?spankwire\.com/
	18	(?:
	19	[^/]+/video\|
	20	EmbedPlayer\.aspx/?\?.*?\bArticleId=
	21	)
	22	(?P<id>\d+)
	23	'''
	24	_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)']
	25	_TESTS = [{
	26	# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
	27	'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
	28	'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd',
	29	'info_dict': {
	30	'id': '103545',
	31	'ext': 'mp4',
	32	'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
	33	'description': 'Crazy Bitch X rated music video.',
	34	'duration': 222,
	35	'uploader': 'oreusz',
	36	'uploader_id': '124697',
	37	'timestamp': 1178587885,
	38	'upload_date': '20070508',
	39	'average_rating': float,
	40	'view_count': int,
	41	'comment_count': int,
	42	'age_limit': 18,
	43	'categories': list,
	44	'tags': list,
	45	},
	46	}, {
	47	# download URL pattern: */mp4_<format_id>_<video_id>.mp4
	48	'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
	49	'md5': '09b3c20833308b736ae8902db2f8d7e6',
	50	'info_dict': {
	51	'id': '1921551',
	52	'ext': 'mp4',
	53	'title': 'Titcums Compiloation I',
	54	'description': 'cum on tits',
	55	'uploader': 'dannyh78999',
	56	'uploader_id': '3056053',
	57	'upload_date': '20150822',
	58	'age_limit': 18,
	59	},
	60	'params': {
	61	'proxy': '127.0.0.1:8118'
	62	},
	63	'skip': 'removed',
	64	}, {
	65	'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true',
	66	'only_matching': True,
	67	}]
	68
	69	def _real_extract(self, url):
	70	video_id = self._match_id(url)
	71
	72	video = self._download_json(
	73	'https://www.spankwire.com/api/video/%s.json' % video_id, video_id)
	74
	75	title = video['title']
	76
	77	formats = []
	78	videos = video.get('videos')
	79	if isinstance(videos, dict):
	80	for format_id, format_url in videos.items():
	81	video_url = url_or_none(format_url)
	82	if not format_url:
	83	continue
	84	height = int_or_none(self._search_regex(
	85	r'(\d+)[pP]', format_id, 'height', default=None))
	86	m = re.search(
	87	r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url)
	88	if m:
	89	tbr = int(m.group('tbr'))
	90	height = height or int(m.group('height'))
	91	else:
	92	tbr = None
	93	formats.append({
	94	'url': video_url,
	95	'format_id': '%dp' % height if height else format_id,
	96	'height': height,
	97	'tbr': tbr,
	98	})
	99	m3u8_url = url_or_none(video.get('HLS'))
	100	if m3u8_url:
	101	formats.extend(self._extract_m3u8_formats(
	102	m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
	103	m3u8_id='hls', fatal=False))
	104
	105	view_count = str_to_int(video.get('viewed'))
	106
	107	thumbnails = []
	108	for preference, t in enumerate(('', '2x'), start=0):
	109	thumbnail_url = url_or_none(video.get('poster%s' % t))
	110	if not thumbnail_url:
	111	continue
	112	thumbnails.append({
	113	'url': thumbnail_url,
	114	'preference': preference,
	115	})
	116
	117	def extract_names(key):
	118	entries_list = video.get(key)
	119	if not isinstance(entries_list, list):
	120	return
	121	entries = []
	122	for entry in entries_list:
	123	name = str_or_none(entry.get('name'))
	124	if name:
	125	entries.append(name)
	126	return entries
	127
	128	categories = extract_names('categories')
	129	tags = extract_names('tags')
	130
	131	uploader = None
	132	info = {}
	133
	134	webpage = self._download_webpage(
	135	'https://www.spankwire.com/_/video%s/' % video_id, video_id,
	136	fatal=False)
	137	if webpage:
	138	info = self._search_json_ld(webpage, video_id, default={})
	139	thumbnail_url = None
	140	if 'thumbnail' in info:
	141	thumbnail_url = url_or_none(info['thumbnail'])
	142	del info['thumbnail']
	143	if not thumbnail_url:
	144	thumbnail_url = self._og_search_thumbnail(webpage)
	145	if thumbnail_url:
	146	thumbnails.append({
	147	'url': thumbnail_url,
	148	'preference': 10,
	149	})
	150	uploader = self._html_search_regex(
	151	r'(?s)by\s<a[^>]+\bclass=["\']uploaded__by[^>]>(.+?)</a>',
	152	webpage, 'uploader', fatal=False)
	153	if not view_count:
	154	view_count = str_to_int(self._search_regex(
	155	r'data-views=["\']([\d,.]+)', webpage, 'view count',
	156	fatal=False))
	157
	158	return merge_dicts({
	159	'id': video_id,
	160	'title': title,
	161	'description': video.get('description'),
	162	'duration': int_or_none(video.get('duration')),
	163	'thumbnails': thumbnails,
	164	'uploader': uploader,
	165	'uploader_id': str_or_none(video.get('userId')),
	166	'timestamp': int_or_none(video.get('time_approved_on')),
	167	'average_rating': float_or_none(video.get('rating')),
	168	'view_count': view_count,
	169	'comment_count': int_or_none(video.get('comments')),
	170	'age_limit': 18,
	171	'categories': categories,
	172	'tags': tags,
	173	'formats': formats,
	174	}, info)