jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import re
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	ExtractorError,
	8	int_or_none,
	9	float_or_none,
	10	unescapeHTML,
	11	)
	12
	13
	14	class WistiaIE(InfoExtractor):
	15	_VALID_URL = r'(?:wistia:\|https?://(?:fast\.)?wistia\.(?:net\|com)/embed/(?:iframe\|medias)/)(?P<id>[a-z0-9]{10})'
	16	_EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
	17
	18	_TESTS = [{
	19	'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
	20	'md5': 'cafeb56ec0c53c18c97405eecb3133df',
	21	'info_dict': {
	22	'id': 'sh7fpupwlt',
	23	'ext': 'mov',
	24	'title': 'Being Resourceful',
	25	'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
	26	'upload_date': '20131204',
	27	'timestamp': 1386185018,
	28	'duration': 117,
	29	},
	30	}, {
	31	'url': 'wistia:sh7fpupwlt',
	32	'only_matching': True,
	33	}, {
	34	# with hls video
	35	'url': 'wistia:807fafadvk',
	36	'only_matching': True,
	37	}, {
	38	'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
	39	'only_matching': True,
	40	}, {
	41	'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
	42	'only_matching': True,
	43	}]
	44
	45	# https://wistia.com/support/embed-and-share/video-on-your-website
	46	@staticmethod
	47	def _extract_url(webpage):
	48	urls = WistiaIE._extract_urls(webpage)
	49	return urls[0] if urls else None
	50
	51	@staticmethod
	52	def _extract_urls(webpage):
	53	urls = []
	54	for match in re.finditer(
	55	r'<(?:meta[^>]+?content\|(?:iframe\|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net\|com)/embed/(?:iframe\|medias)/[a-z0-9]{10})', webpage):
	56	urls.append(unescapeHTML(match.group('url')))
	57	for match in re.finditer(
	58	r'''(?sx)
	59	<div[^>]+class=(["'])(?:(?!\1).)?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)?\1
	60	''', webpage):
	61	urls.append('wistia:%s' % match.group('id'))
	62	for match in re.finditer(r'(?:data-wistia-?id=["\']\|Wistia\.embed\(["\']\|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
	63	urls.append('wistia:%s' % match.group('id'))
	64	return urls
	65
	66	def _real_extract(self, url):
	67	video_id = self._match_id(url)
	68
	69	data_json = self._download_json(
	70	self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
	71	# Some videos require this.
	72	headers={
	73	'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
	74	})
	75
	76	if data_json.get('error'):
	77	raise ExtractorError(
	78	'Error while getting the playlist', expected=True)
	79
	80	data = data_json['media']
	81	title = data['name']
	82
	83	formats = []
	84	thumbnails = []
	85	for a in data['assets']:
	86	aurl = a.get('url')
	87	if not aurl:
	88	continue
	89	astatus = a.get('status')
	90	atype = a.get('type')
	91	if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
	92	continue
	93	elif atype in ('still', 'still_image'):
	94	thumbnails.append({
	95	'url': aurl,
	96	'width': int_or_none(a.get('width')),
	97	'height': int_or_none(a.get('height')),
	98	'filesize': int_or_none(a.get('size')),
	99	})
	100	else:
	101	aext = a.get('ext')
	102	display_name = a.get('display_name')
	103	format_id = atype
	104	if atype and atype.endswith('_video') and display_name:
	105	format_id = '%s-%s' % (atype[:-6], display_name)
	106	f = {
	107	'format_id': format_id,
	108	'url': aurl,
	109	'tbr': int_or_none(a.get('bitrate')) or None,
	110	'preference': 1 if atype == 'original' else None,
	111	}
	112	if display_name == 'Audio':
	113	f.update({
	114	'vcodec': 'none',
	115	})
	116	else:
	117	f.update({
	118	'width': int_or_none(a.get('width')),
	119	'height': int_or_none(a.get('height')),
	120	'vcodec': a.get('codec'),
	121	})
	122	if a.get('container') == 'm3u8' or aext == 'm3u8':
	123	ts_f = f.copy()
	124	ts_f.update({
	125	'ext': 'ts',
	126	'format_id': f['format_id'].replace('hls-', 'ts-'),
	127	'url': f['url'].replace('.bin', '.ts'),
	128	})
	129	formats.append(ts_f)
	130	f.update({
	131	'ext': 'mp4',
	132	'protocol': 'm3u8_native',
	133	})
	134	else:
	135	f.update({
	136	'container': a.get('container'),
	137	'ext': aext,
	138	'filesize': int_or_none(a.get('size')),
	139	})
	140	formats.append(f)
	141
	142	self._sort_formats(formats)
	143
	144	subtitles = {}
	145	for caption in data.get('captions', []):
	146	language = caption.get('language')
	147	if not language:
	148	continue
	149	subtitles[language] = [{
	150	'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
	151	}]
	152
	153	return {
	154	'id': video_id,
	155	'title': title,
	156	'description': data.get('seoDescription'),
	157	'formats': formats,
	158	'thumbnails': thumbnails,
	159	'duration': float_or_none(data.get('duration')),
	160	'timestamp': int_or_none(data.get('createdAt')),
	161	'subtitles': subtitles,
	162	}