jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# encoding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import os
	6	import re
	7
	8	from .common import InfoExtractor
	9	from .youtube import YoutubeIE
	10	from ..utils import (
	11	compat_urllib_error,
	12	compat_urllib_parse,
	13	compat_urllib_request,
	14	compat_urlparse,
	15	compat_xml_parse_error,
	16
	17	ExtractorError,
	18	float_or_none,
	19	HEADRequest,
	20	orderedSet,
	21	parse_xml,
	22	smuggle_url,
	23	unescapeHTML,
	24	unified_strdate,
	25	url_basename,
	26	)
	27	from .brightcove import BrightcoveIE
	28	from .ooyala import OoyalaIE
	29	from .rutv import RUTVIE
	30	from .smotri import SmotriIE
	31
	32
	33	class GenericIE(InfoExtractor):
	34	IE_DESC = 'Generic downloader that works on some sites'
	35	_VALID_URL = r'.*'
	36	IE_NAME = 'generic'
	37	_TESTS = [
	38	{
	39	'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
	40	'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
	41	'info_dict': {
	42	'id': '13601338388002',
	43	'ext': 'mp4',
	44	'uploader': 'www.hodiho.fr',
	45	'title': 'R\u00e9gis plante sa Jeep',
	46	}
	47	},
	48	# bandcamp page with custom domain
	49	{
	50	'add_ie': ['Bandcamp'],
	51	'url': 'http://bronyrock.com/track/the-pony-mash',
	52	'info_dict': {
	53	'id': '3235767654',
	54	'ext': 'mp3',
	55	'title': 'The Pony Mash',
	56	'uploader': 'M_Pallante',
	57	},
	58	'skip': 'There is a limit of 200 free downloads / month for the test song',
	59	},
	60	# embedded brightcove video
	61	# it also tests brightcove videos that need to set the 'Referer' in the
	62	# http requests
	63	{
	64	'add_ie': ['Brightcove'],
	65	'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
	66	'info_dict': {
	67	'id': '2765128793001',
	68	'ext': 'mp4',
	69	'title': 'Le cours de bourse : l’analyse technique',
	70	'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
	71	'uploader': 'BFM BUSINESS',
	72	},
	73	'params': {
	74	'skip_download': True,
	75	},
	76	},
	77	{
	78	# https://github.com/rg3/youtube-dl/issues/2253
	79	'url': 'http://bcove.me/i6nfkrc3',
	80	'md5': '0ba9446db037002366bab3b3eb30c88c',
	81	'info_dict': {
	82	'id': '3101154703001',
	83	'ext': 'mp4',
	84	'title': 'Still no power',
	85	'uploader': 'thestar.com',
	86	'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
	87	},
	88	'add_ie': ['Brightcove'],
	89	},
	90	{
	91	'url': 'http://www.championat.com/video/football/v/87/87499.html',
	92	'md5': 'fb973ecf6e4a78a67453647444222983',
	93	'info_dict': {
	94	'id': '3414141473001',
	95	'ext': 'mp4',
	96	'title': 'Видео. Удаление Дзагоева (ЦСКА)',
	97	'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
	98	'uploader': 'Championat',
	99	},
	100	},
	101	# Direct link to a video
	102	{
	103	'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
	104	'md5': '67d406c2bcb6af27fa886f31aa934bbe',
	105	'info_dict': {
	106	'id': 'trailer',
	107	'ext': 'mp4',
	108	'title': 'trailer',
	109	'upload_date': '20100513',
	110	}
	111	},
	112	# ooyala video
	113	{
	114	'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
	115	'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
	116	'info_dict': {
	117	'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
	118	'ext': 'mp4',
	119	'title': '2cc213299525360.mov', # that's what we get
	120	},
	121	},
	122	# google redirect
	123	{
	124	'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
	125	'info_dict': {
	126	'id': 'cmQHVoWB5FY',
	127	'ext': 'mp4',
	128	'upload_date': '20130224',
	129	'uploader_id': 'TheVerge',
	130	'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
	131	'uploader': 'The Verge',
	132	'title': 'First Firefox OS phones side-by-side',
	133	},
	134	'params': {
	135	'skip_download': False,
	136	}
	137	},
	138	# embed.ly video
	139	{
	140	'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
	141	'info_dict': {
	142	'id': '9ODmcdjQcHQ',
	143	'ext': 'mp4',
	144	'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
	145	'upload_date': '20140225',
	146	'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
	147	'uploader': 'Tested',
	148	'uploader_id': 'testedcom',
	149	},
	150	# No need to test YoutubeIE here
	151	'params': {
	152	'skip_download': True,
	153	},
	154	},
	155	# funnyordie embed
	156	{
	157	'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
	158	'md5': '7cf780be104d40fea7bae52eed4a470e',
	159	'info_dict': {
	160	'id': '18e820ec3f',
	161	'ext': 'mp4',
	162	'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
	163	'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
	164	},
	165	},
	166	# RUTV embed
	167	{
	168	'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
	169	'info_dict': {
	170	'id': '776940',
	171	'ext': 'mp4',
	172	'title': 'Охотское море стало целиком российским',
	173	'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
	174	},
	175	'params': {
	176	# m3u8 download
	177	'skip_download': True,
	178	},
	179	},
	180	# Embedded TED video
	181	{
	182	'url': 'http://en.support.wordpress.com/videos/ted-talks/',
	183	'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
	184	'info_dict': {
	185	'id': '981',
	186	'ext': 'mp4',
	187	'title': 'My web playroom',
	188	'uploader': 'Ze Frank',
	189	'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
	190	}
	191	},
	192	# Embeded Ustream video
	193	{
	194	'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
	195	'md5': '27b99cdb639c9b12a79bca876a073417',
	196	'info_dict': {
	197	'id': '45734260',
	198	'ext': 'flv',
	199	'uploader': 'AU SPA: The NSA and Privacy',
	200	'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
	201	}
	202	},
	203	# nowvideo embed hidden behind percent encoding
	204	{
	205	'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
	206	'md5': '2baf4ddd70f697d94b1c18cf796d5107',
	207	'info_dict': {
	208	'id': '06e53103ca9aa',
	209	'ext': 'flv',
	210	'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
	211	'description': 'No description',
	212	},
	213	},
	214	# arte embed
	215	{
	216	'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
	217	'md5': '7653032cbb25bf6c80d80f217055fa43',
	218	'info_dict': {
	219	'id': '048195-004_PLUS7-F',
	220	'ext': 'flv',
	221	'title': 'X:enius',
	222	'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
	223	'upload_date': '20140320',
	224	},
	225	'params': {
	226	'skip_download': 'Requires rtmpdump'
	227	}
	228	},
	229	# smotri embed
	230	{
	231	'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
	232	'md5': 'ec40048448e9284c9a1de77bb188108b',
	233	'info_dict': {
	234	'id': 'v27008541fad',
	235	'ext': 'mp4',
	236	'title': 'Крым и Севастополь вошли в состав России',
	237	'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
	238	'duration': 900,
	239	'upload_date': '20140318',
	240	'uploader': 'rbctv_2012_4',
	241	'uploader_id': 'rbctv_2012_4',
	242	},
	243	},
	244	# Condé Nast embed
	245	{
	246	'url': 'http://www.wired.com/2014/04/honda-asimo/',
	247	'md5': 'ba0dfe966fa007657bd1443ee672db0f',
	248	'info_dict': {
	249	'id': '53501be369702d3275860000',
	250	'ext': 'mp4',
	251	'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
	252	}
	253	},
	254	# Dailymotion embed
	255	{
	256	'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
	257	'md5': '441aeeb82eb72c422c7f14ec533999cd',
	258	'info_dict': {
	259	'id': 'k2mm4bCdJ6CQ2i7c8o2',
	260	'ext': 'mp4',
	261	'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
	262	'uploader': 'Spi0n',
	263	},
	264	'add_ie': ['Dailymotion'],
	265	},
	266	# YouTube embed
	267	{
	268	'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
	269	'info_dict': {
	270	'id': 'FXRb4ykk4S0',
	271	'ext': 'mp4',
	272	'title': 'The NBL Auction 2014',
	273	'uploader': 'BADMINTON England',
	274	'uploader_id': 'BADMINTONEvents',
	275	'upload_date': '20140603',
	276	'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
	277	},
	278	'add_ie': ['Youtube'],
	279	'params': {
	280	'skip_download': True,
	281	}
	282	},
	283	# MTVSercices embed
	284	{
	285	'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
	286	'md5': '35727f82f58c76d996fc188f9755b0d5',
	287	'info_dict': {
	288	'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
	289	'ext': 'mp4',
	290	'title': 'Review',
	291	'description': 'Mario\'s life in the fast lane has never looked so good.',
	292	},
	293	},
	294	# YouTube embed via <data-embed-url="">
	295	{
	296	'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
	297	'info_dict': {
	298	'id': 'jpSGZsgga_I',
	299	'ext': 'mp4',
	300	'title': 'Asphalt 8: Airborne - Launch Trailer',
	301	'uploader': 'Gameloft',
	302	'uploader_id': 'gameloft',
	303	'upload_date': '20130821',
	304	'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
	305	},
	306	'params': {
	307	'skip_download': True,
	308	}
	309	},
	310	# Camtasia studio
	311	{
	312	'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
	313	'playlist': [{
	314	'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
	315	'info_dict': {
	316	'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
	317	'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
	318	'ext': 'flv',
	319	'duration': 2235.90,
	320	}
	321	}, {
	322	'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
	323	'info_dict': {
	324	'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
	325	'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
	326	'ext': 'flv',
	327	'duration': 2235.93,
	328	}
	329	}],
	330	'info_dict': {
	331	'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
	332	}
	333	}
	334	]
	335
	336	def report_download_webpage(self, video_id):
	337	"""Report webpage download."""
	338	if not self._downloader.params.get('test', False):
	339	self._downloader.report_warning('Falling back on generic information extractor.')
	340	super(GenericIE, self).report_download_webpage(video_id)
	341
	342	def report_following_redirect(self, new_url):
	343	"""Report information extraction."""
	344	self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
	345
	346	def _send_head(self, url):
	347	"""Check if it is a redirect, like url shorteners, in case return the new url."""
	348
	349	class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
	350	"""
	351	Subclass the HTTPRedirectHandler to make it use our
	352	HEADRequest also on the redirected URL
	353	"""
	354	def redirect_request(self, req, fp, code, msg, headers, newurl):
	355	if code in (301, 302, 303, 307):
	356	newurl = newurl.replace(' ', '%20')
	357	newheaders = dict((k,v) for k,v in req.headers.items()
	358	if k.lower() not in ("content-length", "content-type"))
	359	try:
	360	# This function was deprecated in python 3.3 and removed in 3.4
	361	origin_req_host = req.get_origin_req_host()
	362	except AttributeError:
	363	origin_req_host = req.origin_req_host
	364	return HEADRequest(newurl,
	365	headers=newheaders,
	366	origin_req_host=origin_req_host,
	367	unverifiable=True)
	368	else:
	369	raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
	370
	371	class HTTPMethodFallback(compat_urllib_request.BaseHandler):
	372	"""
	373	Fallback to GET if HEAD is not allowed (405 HTTP error)
	374	"""
	375	def http_error_405(self, req, fp, code, msg, headers):
	376	fp.read()
	377	fp.close()
	378
	379	newheaders = dict((k,v) for k,v in req.headers.items()
	380	if k.lower() not in ("content-length", "content-type"))
	381	return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
	382	headers=newheaders,
	383	origin_req_host=req.get_origin_req_host(),
	384	unverifiable=True))
	385
	386	# Build our opener
	387	opener = compat_urllib_request.OpenerDirector()
	388	for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
	389	HTTPMethodFallback, HEADRedirectHandler,
	390	compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
	391	opener.add_handler(handler())
	392
	393	response = opener.open(HEADRequest(url))
	394	if response is None:
	395	raise ExtractorError('Invalid URL protocol')
	396	return response
	397
	398	def _extract_rss(self, url, video_id, doc):
	399	playlist_title = doc.find('./channel/title').text
	400	playlist_desc_el = doc.find('./channel/description')
	401	playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
	402
	403	entries = [{
	404	'_type': 'url',
	405	'url': e.find('link').text,
	406	'title': e.find('title').text,
	407	} for e in doc.findall('./channel/item')]
	408
	409	return {
	410	'_type': 'playlist',
	411	'id': url,
	412	'title': playlist_title,
	413	'description': playlist_desc,
	414	'entries': entries,
	415	}
	416
	417	def _extract_camtasia(self, url, video_id, webpage):
	418	""" Returns None if no camtasia video can be found. """
	419
	420	camtasia_cfg = self._search_regex(
	421	r'fo\.addVariable$\s"csConfigFile",\s"([^"]+)"\s*$;',
	422	webpage, 'camtasia configuration file', default=None)
	423	if camtasia_cfg is None:
	424	return None
	425
	426	title = self._html_search_meta('DC.title', webpage, fatal=True)
	427
	428	camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
	429	camtasia_cfg = self._download_xml(
	430	camtasia_url, video_id,
	431	note='Downloading camtasia configuration',
	432	errnote='Failed to download camtasia configuration')
	433	fileset_node = camtasia_cfg.find('./playlist/array/fileset')
	434
	435	entries = []
	436	for n in fileset_node.getchildren():
	437	url_n = n.find('./uri')
	438	if url_n is None:
	439	continue
	440
	441	entries.append({
	442	'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
	443	'title': '%s - %s' % (title, n.tag),
	444	'url': compat_urlparse.urljoin(url, url_n.text),
	445	'duration': float_or_none(n.find('./duration').text),
	446	})
	447
	448	return {
	449	'_type': 'playlist',
	450	'entries': entries,
	451	'title': title,
	452	}
	453
	454	def _real_extract(self, url):
	455	if url.startswith('//'):
	456	return {
	457	'_type': 'url',
	458	'url': self.http_scheme() + url,
	459	}
	460
	461	parsed_url = compat_urlparse.urlparse(url)
	462	if not parsed_url.scheme:
	463	default_search = self._downloader.params.get('default_search')
	464	if default_search is None:
	465	default_search = 'fixup_error'
	466
	467	if default_search in ('auto', 'auto_warning', 'fixup_error'):
	468	if '/' in url:
	469	self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
	470	return self.url_result('http://' + url)
	471	elif default_search != 'fixup_error':
	472	if default_search == 'auto_warning':
	473	if re.match(r'^(?:url\|URL)$', url):
	474	raise ExtractorError(
	475	'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
	476	expected=True)
	477	else:
	478	self._downloader.report_warning(
	479	'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
	480	return self.url_result('ytsearch:' + url)
	481
	482	if default_search in ('error', 'fixup_error'):
	483	raise ExtractorError(
	484	('%r is not a valid URL. '
	485	'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
	486	) % (url, url), expected=True)
	487	else:
	488	assert ':' in default_search
	489	return self.url_result(default_search + url)
	490	video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
	491
	492	self.to_screen('%s: Requesting header' % video_id)
	493
	494	try:
	495	response = self._send_head(url)
	496
	497	# Check for redirect
	498	new_url = response.geturl()
	499	if url != new_url:
	500	self.report_following_redirect(new_url)
	501	return self.url_result(new_url)
	502
	503	# Check for direct link to a video
	504	content_type = response.headers.get('Content-Type', '')
	505	m = re.match(r'^(?P<type>audio\|video\|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
	506	if m:
	507	upload_date = response.headers.get('Last-Modified')
	508	if upload_date:
	509	upload_date = unified_strdate(upload_date)
	510	return {
	511	'id': video_id,
	512	'title': os.path.splitext(url_basename(url))[0],
	513	'formats': [{
	514	'format_id': m.group('format_id'),
	515	'url': url,
	516	'vcodec': 'none' if m.group('type') == 'audio' else None
	517	}],
	518	'upload_date': upload_date,
	519	}
	520
	521	except compat_urllib_error.HTTPError:
	522	# This may be a stupid server that doesn't like HEAD, our UA, or so
	523	pass
	524
	525	try:
	526	webpage = self._download_webpage(url, video_id)
	527	except ValueError:
	528	# since this is the last-resort InfoExtractor, if
	529	# this error is thrown, it'll be thrown here
	530	raise ExtractorError('Failed to download URL: %s' % url)
	531
	532	self.report_extraction(video_id)
	533
	534	# Is it an RSS feed?
	535	try:
	536	doc = parse_xml(webpage)
	537	if doc.tag == 'rss':
	538	return self._extract_rss(url, video_id, doc)
	539	except compat_xml_parse_error:
	540	pass
	541
	542	# Is it a Camtasia project?
	543	camtasia_res = self._extract_camtasia(url, video_id, webpage)
	544	if camtasia_res is not None:
	545	return camtasia_res
	546
	547	# Sometimes embedded video player is hidden behind percent encoding
	548	# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
	549	# Unescaping the whole page allows to handle those cases in a generic way
	550	webpage = compat_urllib_parse.unquote(webpage)
	551
	552	# it's tempting to parse this further, but you would
	553	# have to take into account all the variations like
	554	# Video Title - Site Name
	555	# Site Name \| Video Title
	556	# Video Title - Tagline \| Site Name
	557	# and so on and so forth; it's just not practical
	558	video_title = self._html_search_regex(
	559	r'(?s)<title>(.*?)</title>', webpage, 'video title',
	560	default='video')
	561
	562	# video uploader is domain name
	563	video_uploader = self._search_regex(
	564	r'^(?:https?://)?([^/])/.', url, 'video uploader')
	565
	566	# Helper method
	567	def _playlist_from_matches(matches, getter, ie=None):
	568	urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
	569	return self.playlist_result(
	570	urlrs, playlist_id=video_id, playlist_title=video_title)
	571
	572	# Look for BrightCove:
	573	bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
	574	if bc_urls:
	575	self.to_screen('Brightcove video detected.')
	576	entries = [{
	577	'_type': 'url',
	578	'url': smuggle_url(bc_url, {'Referer': url}),
	579	'ie_key': 'Brightcove'
	580	} for bc_url in bc_urls]
	581
	582	return {
	583	'_type': 'playlist',
	584	'title': video_title,
	585	'id': video_id,
	586	'entries': entries,
	587	}
	588
	589	# Look for embedded (iframe) Vimeo player
	590	mobj = re.search(
	591	r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
	592	if mobj:
	593	player_url = unescapeHTML(mobj.group('url'))
	594	surl = smuggle_url(player_url, {'Referer': url})
	595	return self.url_result(surl, 'Vimeo')
	596
	597	# Look for embedded (swf embed) Vimeo player
	598	mobj = re.search(
	599	r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
	600	if mobj:
	601	return self.url_result(mobj.group(1), 'Vimeo')
	602
	603	# Look for embedded YouTube player
	604	matches = re.findall(r'''(?x)
	605	(?:
	606	<iframe[^>]+?src=\|
	607	data-video-url=\|
	608	<embed[^>]+?src=\|
	609	embedSWF\(?:\s*
	610	)
	611	(["\'])
	612	(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
	613	(?:embed\|v)/.+?)
	614	\1''', webpage)
	615	if matches:
	616	return _playlist_from_matches(
	617	matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
	618
	619	# Look for embedded Dailymotion player
	620	matches = re.findall(
	621	r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
	622	if matches:
	623	return _playlist_from_matches(
	624	matches, lambda m: unescapeHTML(m[1]))
	625
	626	# Look for embedded Wistia player
	627	match = re.search(
	628	r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
	629	if match:
	630	return {
	631	'_type': 'url_transparent',
	632	'url': unescapeHTML(match.group('url')),
	633	'ie_key': 'Wistia',
	634	'uploader': video_uploader,
	635	'title': video_title,
	636	'id': video_id,
	637	}
	638
	639	# Look for embedded blip.tv player
	640	mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
	641	if mobj:
	642	return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
	643	mobj = re.search(r'<(?:iframe\|embed\|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/\|api\.swf#)[a-zA-Z0-9_]+)', webpage)
	644	if mobj:
	645	return self.url_result(mobj.group(1), 'BlipTV')
	646
	647	# Look for embedded condenast player
	648	matches = re.findall(
	649	r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
	650	webpage)
	651	if matches:
	652	return {
	653	'_type': 'playlist',
	654	'entries': [{
	655	'_type': 'url',
	656	'ie_key': 'CondeNast',
	657	'url': ma,
	658	} for ma in matches],
	659	'title': video_title,
	660	'id': video_id,
	661	}
	662
	663	# Look for Bandcamp pages with custom domain
	664	mobj = re.search(r'<meta property="og:url"[^>]?content="(.?bandcamp\.com.*?)"', webpage)
	665	if mobj is not None:
	666	burl = unescapeHTML(mobj.group(1))
	667	# Don't set the extractor because it can be a track url or an album
	668	return self.url_result(burl)
	669
	670	# Look for embedded Vevo player
	671	mobj = re.search(
	672	r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
	673	if mobj is not None:
	674	return self.url_result(mobj.group('url'))
	675
	676	# Look for Ooyala videos
	677	mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode\|ec)=(?P<ec>[^"&]+)', webpage) or
	678	re.search(r'OO.Player.create\([\'"].?[\'"],\s[\'"](?P<ec>.{32})[\'"]', webpage))
	679	if mobj is not None:
	680	return OoyalaIE._build_url_result(mobj.group('ec'))
	681
	682	# Look for Aparat videos
	683	mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
	684	if mobj is not None:
	685	return self.url_result(mobj.group(1), 'Aparat')
	686
	687	# Look for MPORA videos
	688	mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com\|de)/videos/[^"]+)"', webpage)
	689	if mobj is not None:
	690	return self.url_result(mobj.group(1), 'Mpora')
	691
	692	# Look for embedded NovaMov-based player
	693	mobj = re.search(
	694	r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
	695	(?P<url>http://(?:(?:embed\|www)\.)?
	696	(?:novamov\.com\|
	697	nowvideo\.(?:ch\|sx\|eu\|at\|ag\|co)\|
	698	videoweed\.(?:es\|com)\|
	699	movshare\.(?:net\|sx\|ag)\|
	700	divxstage\.(?:eu\|net\|ch\|co\|at\|ag))
	701	/embed\.php.+?)\1''', webpage)
	702	if mobj is not None:
	703	return self.url_result(mobj.group('url'))
	704
	705	# Look for embedded Facebook player
	706	mobj = re.search(
	707	r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
	708	if mobj is not None:
	709	return self.url_result(mobj.group('url'), 'Facebook')
	710
	711	# Look for embedded VK player
	712	mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
	713	if mobj is not None:
	714	return self.url_result(mobj.group('url'), 'VK')
	715
	716	# Look for embedded ivi player
	717	mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
	718	if mobj is not None:
	719	return self.url_result(mobj.group('url'), 'Ivi')
	720
	721	# Look for embedded Huffington Post player
	722	mobj = re.search(
	723	r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
	724	if mobj is not None:
	725	return self.url_result(mobj.group('url'), 'HuffPost')
	726
	727	# Look for embed.ly
	728	mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
	729	if mobj is not None:
	730	return self.url_result(mobj.group('url'))
	731	mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
	732	if mobj is not None:
	733	return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
	734
	735	# Look for funnyordie embed
	736	matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
	737	if matches:
	738	return _playlist_from_matches(
	739	matches, getter=unescapeHTML, ie='FunnyOrDie')
	740
	741	# Look for embedded RUTV player
	742	rutv_url = RUTVIE._extract_url(webpage)
	743	if rutv_url:
	744	return self.url_result(rutv_url, 'RUTV')
	745
	746	# Look for embedded TED player
	747	mobj = re.search(
	748	r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
	749	if mobj is not None:
	750	return self.url_result(mobj.group('url'), 'TED')
	751
	752	# Look for embedded Ustream videos
	753	mobj = re.search(
	754	r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
	755	if mobj is not None:
	756	return self.url_result(mobj.group('url'), 'Ustream')
	757
	758	# Look for embedded arte.tv player
	759	mobj = re.search(
	760	r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
	761	webpage)
	762	if mobj is not None:
	763	return self.url_result(mobj.group('url'), 'ArteTVEmbed')
	764
	765	# Look for embedded smotri.com player
	766	smotri_url = SmotriIE._extract_url(webpage)
	767	if smotri_url:
	768	return self.url_result(smotri_url, 'Smotri')
	769
	770	# Look for embeded soundcloud player
	771	mobj = re.search(
	772	r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
	773	webpage)
	774	if mobj is not None:
	775	url = unescapeHTML(mobj.group('url'))
	776	return self.url_result(url)
	777
	778	# Look for embedded vulture.com player
	779	mobj = re.search(
	780	r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
	781	webpage)
	782	if mobj is not None:
	783	url = unescapeHTML(mobj.group('url'))
	784	return self.url_result(url, ie='Vulture')
	785
	786	# Look for embedded mtvservices player
	787	mobj = re.search(
	788	r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
	789	webpage)
	790	if mobj is not None:
	791	url = unescapeHTML(mobj.group('url'))
	792	return self.url_result(url, ie='MTVServicesEmbedded')
	793
	794	# Look for embedded yahoo player
	795	mobj = re.search(
	796	r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen\|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
	797	webpage)
	798	if mobj is not None:
	799	return self.url_result(mobj.group('url'), 'Yahoo')
	800
	801	# Look for embedded sbs.com.au player
	802	mobj = re.search(
	803	r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
	804	webpage)
	805	if mobj is not None:
	806	return self.url_result(mobj.group('url'), 'SBS')
	807
	808	# Start with something easy: JW Player in SWFObject
	809	found = re.findall(r'flashvars: [\'"](?:.&)?file=(http[^\'"&])', webpage)
	810	if not found:
	811	# Look for gorilla-vid style embedding
	812	found = re.findall(r'''(?sx)
	813	(?:
	814	jw_plugins\|
	815	JWPlayerOptions\|
	816	jwplayer\s$\s["'][^'"]+["']\s$\s\.setup
	817	)
	818	.?file\s:\s["\'](.?)["\']''', webpage)
	819	if not found:
	820	# Broaden the search a little bit
	821	found = re.findall(r'[^A-Za-z0-9]?(?:file\|source)=(http[^\'"&]*)', webpage)
	822	if not found:
	823	# Broaden the findall a little bit: JWPlayer JS loader
	824	found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
	825	if not found:
	826	# Try to find twitter cards info
	827	found = re.findall(r'<meta (?:property\|name)="twitter:player:stream" (?:content\|value)="(.+?)"', webpage)
	828	if not found:
	829	# We look for Open Graph info:
	830	# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
	831	m_video_type = re.findall(r'<meta.?property="og:video:type".?content="video/(.*?)"', webpage)
	832	# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
	833	if m_video_type is not None:
	834	def check_video(vurl):
	835	vpath = compat_urlparse.urlparse(vurl).path
	836	return '.' in vpath and not vpath.endswith('.swf')
	837	found = list(filter(
	838	check_video,
	839	re.findall(r'<meta.?property="og:video".?content="(.*?)"', webpage)))
	840	if not found:
	841	# HTML5 video
	842	found = re.findall(r'(?s)<video[^<](?:>.?<source.*?)? src="([^"]+)"', webpage)
	843	if not found:
	844	found = re.search(
	845	r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
	846	r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
	847	webpage)
	848	if found:
	849	new_url = found.group(1)
	850	self.report_following_redirect(new_url)
	851	return {
	852	'_type': 'url',
	853	'url': new_url,
	854	}
	855	if not found:
	856	raise ExtractorError('Unsupported URL: %s' % url)
	857
	858	entries = []
	859	for video_url in found:
	860	video_url = compat_urlparse.urljoin(url, video_url)
	861	video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
	862
	863	# Sometimes, jwplayer extraction will result in a YouTube URL
	864	if YoutubeIE.suitable(video_url):
	865	entries.append(self.url_result(video_url, 'Youtube'))
	866	continue
	867
	868	# here's a fun little line of code for you:
	869	video_id = os.path.splitext(video_id)[0]
	870
	871	entries.append({
	872	'id': video_id,
	873	'url': video_url,
	874	'uploader': video_uploader,
	875	'title': video_title,
	876	})
	877
	878	if len(entries) == 1:
	879	return entries[0]
	880	else:
	881	for num, e in enumerate(entries, start=1):
	882	e['title'] = '%s (%d)' % (e['title'], num)
	883	return {
	884	'_type': 'playlist',
	885	'entries': entries,
	886	}
	887