jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	import collections
	4	import errno
	5	import io
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import string
	11	import struct
	12	import traceback
	13	import zlib
	14
	15	from .common import InfoExtractor, SearchInfoExtractor
	16	from .subtitles import SubtitlesInfoExtractor
	17	from ..utils import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse,
	21	compat_urllib_request,
	22	compat_urlparse,
	23	compat_str,
	24
	25	clean_html,
	26	get_cachedir,
	27	get_element_by_id,
	28	get_element_by_attribute,
	29	ExtractorError,
	30	int_or_none,
	31	PagedList,
	32	unescapeHTML,
	33	unified_strdate,
	34	orderedSet,
	35	write_json_file,
	36	uppercase_escape,
	37	)
	38
	39	class YoutubeBaseInfoExtractor(InfoExtractor):
	40	"""Provide base functions for Youtube extractors"""
	41	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	42	_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	43	_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	44	_NETRC_MACHINE = 'youtube'
	45	# If True it will raise an error if no login info is provided
	46	_LOGIN_REQUIRED = False
	47
	48	def _set_language(self):
	49	return bool(self._download_webpage(
	50	self._LANG_URL, None,
	51	note=u'Setting language', errnote='unable to set language',
	52	fatal=False))
	53
	54	def _login(self):
	55	(username, password) = self._get_login_info()
	56	# No authentication to be performed
	57	if username is None:
	58	if self._LOGIN_REQUIRED:
	59	raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	60	return False
	61
	62	login_page = self._download_webpage(
	63	self._LOGIN_URL, None,
	64	note=u'Downloading login page',
	65	errnote=u'unable to fetch login page', fatal=False)
	66	if login_page is False:
	67	return
	68
	69	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	70	login_page, u'Login GALX parameter')
	71
	72	# Log in
	73	login_form_strs = {
	74	u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	75	u'Email': username,
	76	u'GALX': galx,
	77	u'Passwd': password,
	78	u'PersistentCookie': u'yes',
	79	u'_utf8': u'霱',
	80	u'bgresponse': u'js_disabled',
	81	u'checkConnection': u'',
	82	u'checkedDomains': u'youtube',
	83	u'dnConn': u'',
	84	u'pstMsg': u'0',
	85	u'rmShown': u'1',
	86	u'secTok': u'',
	87	u'signIn': u'Sign in',
	88	u'timeStmp': u'',
	89	u'service': u'youtube',
	90	u'uilel': u'3',
	91	u'hl': u'en_US',
	92	}
	93	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	94	# chokes on unicode
	95	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
	96	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	97
	98	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	99	login_results = self._download_webpage(
	100	req, None,
	101	note=u'Logging in', errnote=u'unable to log in', fatal=False)
	102	if login_results is False:
	103	return False
	104	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	105	self._downloader.report_warning(u'unable to log in: bad username or password')
	106	return False
	107	return True
	108
	109	def _confirm_age(self):
	110	age_form = {
	111	'next_url': '/',
	112	'action_confirm': 'Confirm',
	113	}
	114	req = compat_urllib_request.Request(self._AGE_URL,
	115	compat_urllib_parse.urlencode(age_form).encode('ascii'))
	116
	117	self._download_webpage(
	118	req, None,
	119	note=u'Confirming age', errnote=u'Unable to confirm age')
	120	return True
	121
	122	def _real_initialize(self):
	123	if self._downloader is None:
	124	return
	125	if not self._set_language():
	126	return
	127	if not self._login():
	128	return
	129	self._confirm_age()
	130
	131
	132	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	133	IE_DESC = u'YouTube.com'
	134	_VALID_URL = r"""(?x)^
	135	(
	136	(?:https?://\|//)? # http(s):// or protocol-independent URL (optional)
	137	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	138	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	139	(?:www\.)?pwnyoutube\.com/\|
	140	(?:www\.)?yourepeat\.com/\|
	141	tube\.majestyc\.net/\|
	142	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	143	(?:.*?\#/)? # handle anchor (#/) redirect urls
	144	(?: # the various things that can precede the ID:
	145	(?:(?:v\|embed\|e)/) # v/ or embed/ or e/
	146	\|(?: # or the v= param in all its forms
	147	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	148	(?:\?\|\#!?) # the params delimiter ? or # or #!
	149	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	150	v=
	151	)
	152	))
	153	\|youtu\.be/ # just youtu.be/xxxx
	154	)
	155	)? # all until now is optional -> you can pass the naked ID
	156	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	157	(?(1).+)? # if we found the ID, everything can follow
	158	$"""
	159	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	160	_formats = {
	161	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	162	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	163	'13': {'ext': '3gp'},
	164	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	165	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	166	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	167	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	168	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	169	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	170	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	171	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	172	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	173	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	174	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	175	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	176
	177
	178	# 3d videos
	179	'82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
	180	'83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
	181	'84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
	182	'85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
	183	'100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
	184	'101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
	185	'102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
	186
	187	# Apple HTTP Live Streaming
	188	'92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
	189	'93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
	190	'94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
	191	'95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
	192	'96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
	193	'132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
	194	'151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
	195
	196	# DASH mp4 video
	197	'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
	198	'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
	199	'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
	200	'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
	201	'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
	202	'138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
	203	'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
	204	'264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
	205
	206	# Dash mp4 audio
	207	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
	208	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
	209	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
	210
	211	# Dash webm
	212	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	213	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	214	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	215	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	216	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	217	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	218	'242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
	219	'243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
	220	'244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
	221	'245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
	222	'246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
	223	'247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
	224	'248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
	225
	226	# Dash webm audio
	227	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
	228	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
	229
	230	# RTMP (unnamed)
	231	'_rtmp': {'protocol': 'rtmp'},
	232	}
	233
	234	IE_NAME = u'youtube'
	235	_TESTS = [
	236	{
	237	u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
	238	u"file": u"BaW_jenozKc.mp4",
	239	u"info_dict": {
	240	u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
	241	u"uploader": u"Philipp Hagemeister",
	242	u"uploader_id": u"phihag",
	243	u"upload_date": u"20121002",
	244	u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
	245	}
	246	},
	247	{
	248	u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
	249	u"file": u"UxxajLWwzqY.mp4",
	250	u"note": u"Test generic use_cipher_signature video (#897)",
	251	u"info_dict": {
	252	u"upload_date": u"20120506",
	253	u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
	254	u"description": u"md5:5b292926389560516e384ac437c0ec07",
	255	u"uploader": u"Icona Pop",
	256	u"uploader_id": u"IconaPop"
	257	}
	258	},
	259	{
	260	u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
	261	u"file": u"07FYdnEawAQ.mp4",
	262	u"note": u"Test VEVO video with age protection (#956)",
	263	u"info_dict": {
	264	u"upload_date": u"20130703",
	265	u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
	266	u"description": u"md5:64249768eec3bc4276236606ea996373",
	267	u"uploader": u"justintimberlakeVEVO",
	268	u"uploader_id": u"justintimberlakeVEVO"
	269	}
	270	},
	271	{
	272	u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
	273	u"file": u"yZIXLfi8CZQ.mp4",
	274	u"note": u"Embed-only video (#1746)",
	275	u"info_dict": {
	276	u"upload_date": u"20120608",
	277	u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
	278	u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
	279	u"uploader": u"SET India",
	280	u"uploader_id": u"setindia"
	281	}
	282	},
	283	{
	284	u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
	285	u"file": u"a9LDPn-MO4I.m4a",
	286	u"note": u"256k DASH audio (format 141) via DASH manifest",
	287	u"info_dict": {
	288	u"upload_date": "20121002",
	289	u"uploader_id": "8KVIDEO",
	290	u"description": "No description available.",
	291	u"uploader": "8KVIDEO",
	292	u"title": "UHDTV TEST 8K VIDEO.mp4"
	293	},
	294	u"params": {
	295	u"youtube_include_dash_manifest": True,
	296	u"format": "141",
	297	},
	298	},
	299	# DASH manifest with encrypted signature
	300	{
	301	u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	302	u'info_dict': {
	303	u'id': u'IB3lcPjvWLA',
	304	u'ext': u'm4a',
	305	u'title': u'Afrojack - The Spark ft. Spree Wilson',
	306	u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
	307	u'uploader': u'AfrojackVEVO',
	308	u'uploader_id': u'AfrojackVEVO',
	309	u'upload_date': u'20131011',
	310	},
	311	u"params": {
	312	u'youtube_include_dash_manifest': True,
	313	u'format': '141',
	314	},
	315	},
	316	]
	317
	318
	319	@classmethod
	320	def suitable(cls, url):
	321	"""Receives a URL and returns True if suitable for this IE."""
	322	if YoutubePlaylistIE.suitable(url): return False
	323	return re.match(cls._VALID_URL, url) is not None
	324
	325	def __init__(self, args, *kwargs):
	326	super(YoutubeIE, self).__init__(args, *kwargs)
	327	self._player_cache = {}
	328
	329	def report_video_info_webpage_download(self, video_id):
	330	"""Report attempt to download video info webpage."""
	331	self.to_screen(u'%s: Downloading video info webpage' % video_id)
	332
	333	def report_information_extraction(self, video_id):
	334	"""Report attempt to extract video information."""
	335	self.to_screen(u'%s: Extracting video information' % video_id)
	336
	337	def report_unavailable_format(self, video_id, format):
	338	"""Report extracted video URL."""
	339	self.to_screen(u'%s: Format %s not available' % (video_id, format))
	340
	341	def report_rtmp_download(self):
	342	"""Indicate the download will use the RTMP protocol."""
	343	self.to_screen(u'RTMP download detected')
	344
	345	def _extract_signature_function(self, video_id, player_url, slen):
	346	id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
	347	player_url)
	348	player_type = id_m.group('ext')
	349	player_id = id_m.group('id')
	350
	351	# Read from filesystem cache
	352	func_id = '%s_%s_%d' % (player_type, player_id, slen)
	353	assert os.path.basename(func_id) == func_id
	354	cache_dir = get_cachedir(self._downloader.params)
	355
	356	cache_enabled = cache_dir is not None
	357	if cache_enabled:
	358	cache_fn = os.path.join(os.path.expanduser(cache_dir),
	359	u'youtube-sigfuncs',
	360	func_id + '.json')
	361	try:
	362	with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
	363	cache_spec = json.load(cachef)
	364	return lambda s: u''.join(s[i] for i in cache_spec)
	365	except IOError:
	366	pass # No cache available
	367
	368	if player_type == 'js':
	369	code = self._download_webpage(
	370	player_url, video_id,
	371	note=u'Downloading %s player %s' % (player_type, player_id),
	372	errnote=u'Download of %s failed' % player_url)
	373	res = self._parse_sig_js(code)
	374	elif player_type == 'swf':
	375	urlh = self._request_webpage(
	376	player_url, video_id,
	377	note=u'Downloading %s player %s' % (player_type, player_id),
	378	errnote=u'Download of %s failed' % player_url)
	379	code = urlh.read()
	380	res = self._parse_sig_swf(code)
	381	else:
	382	assert False, 'Invalid player type %r' % player_type
	383
	384	if cache_enabled:
	385	try:
	386	test_string = u''.join(map(compat_chr, range(slen)))
	387	cache_res = res(test_string)
	388	cache_spec = [ord(c) for c in cache_res]
	389	try:
	390	os.makedirs(os.path.dirname(cache_fn))
	391	except OSError as ose:
	392	if ose.errno != errno.EEXIST:
	393	raise
	394	write_json_file(cache_spec, cache_fn)
	395	except Exception:
	396	tb = traceback.format_exc()
	397	self._downloader.report_warning(
	398	u'Writing cache to %r failed: %s' % (cache_fn, tb))
	399
	400	return res
	401
	402	def _print_sig_code(self, func, slen):
	403	def gen_sig_code(idxs):
	404	def _genslice(start, end, step):
	405	starts = u'' if start == 0 else str(start)
	406	ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
	407	steps = u'' if step == 1 else (u':%d' % step)
	408	return u's[%s%s%s]' % (starts, ends, steps)
	409
	410	step = None
	411	start = '(Never used)' # Quelch pyflakes warnings - start will be
	412	# set as soon as step is set
	413	for i, prev in zip(idxs[1:], idxs[:-1]):
	414	if step is not None:
	415	if i - prev == step:
	416	continue
	417	yield _genslice(start, prev, step)
	418	step = None
	419	continue
	420	if i - prev in [-1, 1]:
	421	step = i - prev
	422	start = prev
	423	continue
	424	else:
	425	yield u's[%d]' % prev
	426	if step is None:
	427	yield u's[%d]' % i
	428	else:
	429	yield _genslice(start, i, step)
	430
	431	test_string = u''.join(map(compat_chr, range(slen)))
	432	cache_res = func(test_string)
	433	cache_spec = [ord(c) for c in cache_res]
	434	expr_code = u' + '.join(gen_sig_code(cache_spec))
	435	code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
	436	self.to_screen(u'Extracted signature function:\n' + code)
	437
	438	def _parse_sig_js(self, jscode):
	439	funcname = self._search_regex(
	440	r'signature=([a-zA-Z]+)', jscode,
	441	u'Initial JS player signature function name')
	442
	443	functions = {}
	444
	445	def argidx(varname):
	446	return string.lowercase.index(varname)
	447
	448	def interpret_statement(stmt, local_vars, allow_recursion=20):
	449	if allow_recursion < 0:
	450	raise ExtractorError(u'Recursion limit reached')
	451
	452	if stmt.startswith(u'var '):
	453	stmt = stmt[len(u'var '):]
	454	ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
	455	r'=(?P<expr>.*)$', stmt)
	456	if ass_m:
	457	if ass_m.groupdict().get('index'):
	458	def assign(val):
	459	lvar = local_vars[ass_m.group('out')]
	460	idx = interpret_expression(ass_m.group('index'),
	461	local_vars, allow_recursion)
	462	assert isinstance(idx, int)
	463	lvar[idx] = val
	464	return val
	465	expr = ass_m.group('expr')
	466	else:
	467	def assign(val):
	468	local_vars[ass_m.group('out')] = val
	469	return val
	470	expr = ass_m.group('expr')
	471	elif stmt.startswith(u'return '):
	472	assign = lambda v: v
	473	expr = stmt[len(u'return '):]
	474	else:
	475	raise ExtractorError(
	476	u'Cannot determine left side of statement in %r' % stmt)
	477
	478	v = interpret_expression(expr, local_vars, allow_recursion)
	479	return assign(v)
	480
	481	def interpret_expression(expr, local_vars, allow_recursion):
	482	if expr.isdigit():
	483	return int(expr)
	484
	485	if expr.isalpha():
	486	return local_vars[expr]
	487
	488	m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
	489	if m:
	490	member = m.group('member')
	491	val = local_vars[m.group('in')]
	492	if member == 'split("")':
	493	return list(val)
	494	if member == 'join("")':
	495	return u''.join(val)
	496	if member == 'length':
	497	return len(val)
	498	if member == 'reverse()':
	499	return val[::-1]
	500	slice_m = re.match(r'slice$(?P<idx>.*)$', member)
	501	if slice_m:
	502	idx = interpret_expression(
	503	slice_m.group('idx'), local_vars, allow_recursion-1)
	504	return val[idx:]
	505
	506	m = re.match(
	507	r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
	508	if m:
	509	val = local_vars[m.group('in')]
	510	idx = interpret_expression(m.group('idx'), local_vars,
	511	allow_recursion-1)
	512	return val[idx]
	513
	514	m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
	515	if m:
	516	a = interpret_expression(m.group('a'),
	517	local_vars, allow_recursion)
	518	b = interpret_expression(m.group('b'),
	519	local_vars, allow_recursion)
	520	return a % b
	521
	522	m = re.match(
	523	r'^(?P<func>[a-zA-Z$]+)$(?P<args>[a-z0-9,]+)$$', expr)
	524	if m:
	525	fname = m.group('func')
	526	if fname not in functions:
	527	functions[fname] = extract_function(fname)
	528	argvals = [int(v) if v.isdigit() else local_vars[v]
	529	for v in m.group('args').split(',')]
	530	return functions[fname](argvals)
	531	raise ExtractorError(u'Unsupported JS expression %r' % expr)
	532
	533	def extract_function(funcname):
	534	func_m = re.search(
	535	r'function ' + re.escape(funcname) +
	536	r'$(?P<args>[a-z,]+)${(?P<code>[^}]+)}',
	537	jscode)
	538	argnames = func_m.group('args').split(',')
	539
	540	def resf(args):
	541	local_vars = dict(zip(argnames, args))
	542	for stmt in func_m.group('code').split(';'):
	543	res = interpret_statement(stmt, local_vars)
	544	return res
	545	return resf
	546
	547	initial_function = extract_function(funcname)
	548	return lambda s: initial_function([s])
	549
	550	def _parse_sig_swf(self, file_contents):
	551	if file_contents[1:3] != b'WS':
	552	raise ExtractorError(
	553	u'Not an SWF file; header is %r' % file_contents[:3])
	554	if file_contents[:1] == b'C':
	555	content = zlib.decompress(file_contents[8:])
	556	else:
	557	raise NotImplementedError(u'Unsupported compression format %r' %
	558	file_contents[:1])
	559
	560	def extract_tags(content):
	561	pos = 0
	562	while pos < len(content):
	563	header16 = struct.unpack('<H', content[pos:pos+2])[0]
	564	pos += 2
	565	tag_code = header16 >> 6
	566	tag_len = header16 & 0x3f
	567	if tag_len == 0x3f:
	568	tag_len = struct.unpack('<I', content[pos:pos+4])[0]
	569	pos += 4
	570	assert pos+tag_len <= len(content)
	571	yield (tag_code, content[pos:pos+tag_len])
	572	pos += tag_len
	573
	574	code_tag = next(tag
	575	for tag_code, tag in extract_tags(content)
	576	if tag_code == 82)
	577	p = code_tag.index(b'\0', 4) + 1
	578	code_reader = io.BytesIO(code_tag[p:])
	579
	580	# Parse ABC (AVM2 ByteCode)
	581	def read_int(reader=None):
	582	if reader is None:
	583	reader = code_reader
	584	res = 0
	585	shift = 0
	586	for _ in range(5):
	587	buf = reader.read(1)
	588	assert len(buf) == 1
	589	b = struct.unpack('<B', buf)[0]
	590	res = res \| ((b & 0x7f) << shift)
	591	if b & 0x80 == 0:
	592	break
	593	shift += 7
	594	return res
	595
	596	def u30(reader=None):
	597	res = read_int(reader)
	598	assert res & 0xf0000000 == 0
	599	return res
	600	u32 = read_int
	601
	602	def s32(reader=None):
	603	v = read_int(reader)
	604	if v & 0x80000000 != 0:
	605	v = - ((v ^ 0xffffffff) + 1)
	606	return v
	607
	608	def read_string(reader=None):
	609	if reader is None:
	610	reader = code_reader
	611	slen = u30(reader)
	612	resb = reader.read(slen)
	613	assert len(resb) == slen
	614	return resb.decode('utf-8')
	615
	616	def read_bytes(count, reader=None):
	617	if reader is None:
	618	reader = code_reader
	619	resb = reader.read(count)
	620	assert len(resb) == count
	621	return resb
	622
	623	def read_byte(reader=None):
	624	resb = read_bytes(1, reader=reader)
	625	res = struct.unpack('<B', resb)[0]
	626	return res
	627
	628	# minor_version + major_version
	629	read_bytes(2 + 2)
	630
	631	# Constant pool
	632	int_count = u30()
	633	for _c in range(1, int_count):
	634	s32()
	635	uint_count = u30()
	636	for _c in range(1, uint_count):
	637	u32()
	638	double_count = u30()
	639	read_bytes((double_count-1) * 8)
	640	string_count = u30()
	641	constant_strings = [u'']
	642	for _c in range(1, string_count):
	643	s = read_string()
	644	constant_strings.append(s)
	645	namespace_count = u30()
	646	for _c in range(1, namespace_count):
	647	read_bytes(1) # kind
	648	u30() # name
	649	ns_set_count = u30()
	650	for _c in range(1, ns_set_count):
	651	count = u30()
	652	for _c2 in range(count):
	653	u30()
	654	multiname_count = u30()
	655	MULTINAME_SIZES = {
	656	0x07: 2, # QName
	657	0x0d: 2, # QNameA
	658	0x0f: 1, # RTQName
	659	0x10: 1, # RTQNameA
	660	0x11: 0, # RTQNameL
	661	0x12: 0, # RTQNameLA
	662	0x09: 2, # Multiname
	663	0x0e: 2, # MultinameA
	664	0x1b: 1, # MultinameL
	665	0x1c: 1, # MultinameLA
	666	}
	667	multinames = [u'']
	668	for _c in range(1, multiname_count):
	669	kind = u30()
	670	assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
	671	if kind == 0x07:
	672	u30() # namespace_idx
	673	name_idx = u30()
	674	multinames.append(constant_strings[name_idx])
	675	else:
	676	multinames.append('[MULTINAME kind: %d]' % kind)
	677	for _c2 in range(MULTINAME_SIZES[kind]):
	678	u30()
	679
	680	# Methods
	681	method_count = u30()
	682	MethodInfo = collections.namedtuple(
	683	'MethodInfo',
	684	['NEED_ARGUMENTS', 'NEED_REST'])
	685	method_infos = []
	686	for method_id in range(method_count):
	687	param_count = u30()
	688	u30() # return type
	689	for _ in range(param_count):
	690	u30() # param type
	691	u30() # name index (always 0 for youtube)
	692	flags = read_byte()
	693	if flags & 0x08 != 0:
	694	# Options present
	695	option_count = u30()
	696	for c in range(option_count):
	697	u30() # val
	698	read_bytes(1) # kind
	699	if flags & 0x80 != 0:
	700	# Param names present
	701	for _ in range(param_count):
	702	u30() # param name
	703	mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
	704	method_infos.append(mi)
	705
	706	# Metadata
	707	metadata_count = u30()
	708	for _c in range(metadata_count):
	709	u30() # name
	710	item_count = u30()
	711	for _c2 in range(item_count):
	712	u30() # key
	713	u30() # value
	714
	715	def parse_traits_info():
	716	trait_name_idx = u30()
	717	kind_full = read_byte()
	718	kind = kind_full & 0x0f
	719	attrs = kind_full >> 4
	720	methods = {}
	721	if kind in [0x00, 0x06]: # Slot or Const
	722	u30() # Slot id
	723	u30() # type_name_idx
	724	vindex = u30()
	725	if vindex != 0:
	726	read_byte() # vkind
	727	elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
	728	u30() # disp_id
	729	method_idx = u30()
	730	methods[multinames[trait_name_idx]] = method_idx
	731	elif kind == 0x04: # Class
	732	u30() # slot_id
	733	u30() # classi
	734	elif kind == 0x05: # Function
	735	u30() # slot_id
	736	function_idx = u30()
	737	methods[function_idx] = multinames[trait_name_idx]
	738	else:
	739	raise ExtractorError(u'Unsupported trait kind %d' % kind)
	740
	741	if attrs & 0x4 != 0: # Metadata present
	742	metadata_count = u30()
	743	for _c3 in range(metadata_count):
	744	u30() # metadata index
	745
	746	return methods
	747
	748	# Classes
	749	TARGET_CLASSNAME = u'SignatureDecipher'
	750	searched_idx = multinames.index(TARGET_CLASSNAME)
	751	searched_class_id = None
	752	class_count = u30()
	753	for class_id in range(class_count):
	754	name_idx = u30()
	755	if name_idx == searched_idx:
	756	# We found the class we're looking for!
	757	searched_class_id = class_id
	758	u30() # super_name idx
	759	flags = read_byte()
	760	if flags & 0x08 != 0: # Protected namespace is present
	761	u30() # protected_ns_idx
	762	intrf_count = u30()
	763	for _c2 in range(intrf_count):
	764	u30()
	765	u30() # iinit
	766	trait_count = u30()
	767	for _c2 in range(trait_count):
	768	parse_traits_info()
	769
	770	if searched_class_id is None:
	771	raise ExtractorError(u'Target class %r not found' %
	772	TARGET_CLASSNAME)
	773
	774	method_names = {}
	775	method_idxs = {}
	776	for class_id in range(class_count):
	777	u30() # cinit
	778	trait_count = u30()
	779	for _c2 in range(trait_count):
	780	trait_methods = parse_traits_info()
	781	if class_id == searched_class_id:
	782	method_names.update(trait_methods.items())
	783	method_idxs.update(dict(
	784	(idx, name)
	785	for name, idx in trait_methods.items()))
	786
	787	# Scripts
	788	script_count = u30()
	789	for _c in range(script_count):
	790	u30() # init
	791	trait_count = u30()
	792	for _c2 in range(trait_count):
	793	parse_traits_info()
	794
	795	# Method bodies
	796	method_body_count = u30()
	797	Method = collections.namedtuple('Method', ['code', 'local_count'])
	798	methods = {}
	799	for _c in range(method_body_count):
	800	method_idx = u30()
	801	u30() # max_stack
	802	local_count = u30()
	803	u30() # init_scope_depth
	804	u30() # max_scope_depth
	805	code_length = u30()
	806	code = read_bytes(code_length)
	807	if method_idx in method_idxs:
	808	m = Method(code, local_count)
	809	methods[method_idxs[method_idx]] = m
	810	exception_count = u30()
	811	for _c2 in range(exception_count):
	812	u30() # from
	813	u30() # to
	814	u30() # target
	815	u30() # exc_type
	816	u30() # var_name
	817	trait_count = u30()
	818	for _c2 in range(trait_count):
	819	parse_traits_info()
	820
	821	assert p + code_reader.tell() == len(code_tag)
	822	assert len(methods) == len(method_idxs)
	823
	824	method_pyfunctions = {}
	825
	826	def extract_function(func_name):
	827	if func_name in method_pyfunctions:
	828	return method_pyfunctions[func_name]
	829	if func_name not in methods:
	830	raise ExtractorError(u'Cannot find function %r' % func_name)
	831	m = methods[func_name]
	832
	833	def resfunc(args):
	834	registers = ['(this)'] + list(args) + [None] * m.local_count
	835	stack = []
	836	coder = io.BytesIO(m.code)
	837	while True:
	838	opcode = struct.unpack('!B', coder.read(1))[0]
	839	if opcode == 36: # pushbyte
	840	v = struct.unpack('!B', coder.read(1))[0]
	841	stack.append(v)
	842	elif opcode == 44: # pushstring
	843	idx = u30(coder)
	844	stack.append(constant_strings[idx])
	845	elif opcode == 48: # pushscope
	846	# We don't implement the scope register, so we'll just
	847	# ignore the popped value
	848	stack.pop()
	849	elif opcode == 70: # callproperty
	850	index = u30(coder)
	851	mname = multinames[index]
	852	arg_count = u30(coder)
	853	args = list(reversed(
	854	[stack.pop() for _ in range(arg_count)]))
	855	obj = stack.pop()
	856	if mname == u'split':
	857	assert len(args) == 1
	858	assert isinstance(args[0], compat_str)
	859	assert isinstance(obj, compat_str)
	860	if args[0] == u'':
	861	res = list(obj)
	862	else:
	863	res = obj.split(args[0])
	864	stack.append(res)
	865	elif mname == u'slice':
	866	assert len(args) == 1
	867	assert isinstance(args[0], int)
	868	assert isinstance(obj, list)
	869	res = obj[args[0]:]
	870	stack.append(res)
	871	elif mname == u'join':
	872	assert len(args) == 1
	873	assert isinstance(args[0], compat_str)
	874	assert isinstance(obj, list)
	875	res = args[0].join(obj)
	876	stack.append(res)
	877	elif mname in method_pyfunctions:
	878	stack.append(method_pyfunctions[mname](args))
	879	else:
	880	raise NotImplementedError(
	881	u'Unsupported property %r on %r'
	882	% (mname, obj))
	883	elif opcode == 72: # returnvalue
	884	res = stack.pop()
	885	return res
	886	elif opcode == 79: # callpropvoid
	887	index = u30(coder)
	888	mname = multinames[index]
	889	arg_count = u30(coder)
	890	args = list(reversed(
	891	[stack.pop() for _ in range(arg_count)]))
	892	obj = stack.pop()
	893	if mname == u'reverse':
	894	assert isinstance(obj, list)
	895	obj.reverse()
	896	else:
	897	raise NotImplementedError(
	898	u'Unsupported (void) property %r on %r'
	899	% (mname, obj))
	900	elif opcode == 93: # findpropstrict
	901	index = u30(coder)
	902	mname = multinames[index]
	903	res = extract_function(mname)
	904	stack.append(res)
	905	elif opcode == 97: # setproperty
	906	index = u30(coder)
	907	value = stack.pop()
	908	idx = stack.pop()
	909	obj = stack.pop()
	910	assert isinstance(obj, list)
	911	assert isinstance(idx, int)
	912	obj[idx] = value
	913	elif opcode == 98: # getlocal
	914	index = u30(coder)
	915	stack.append(registers[index])
	916	elif opcode == 99: # setlocal
	917	index = u30(coder)
	918	value = stack.pop()
	919	registers[index] = value
	920	elif opcode == 102: # getproperty
	921	index = u30(coder)
	922	pname = multinames[index]
	923	if pname == u'length':
	924	obj = stack.pop()
	925	assert isinstance(obj, list)
	926	stack.append(len(obj))
	927	else: # Assume attribute access
	928	idx = stack.pop()
	929	assert isinstance(idx, int)
	930	obj = stack.pop()
	931	assert isinstance(obj, list)
	932	stack.append(obj[idx])
	933	elif opcode == 128: # coerce
	934	u30(coder)
	935	elif opcode == 133: # coerce_s
	936	assert isinstance(stack[-1], (type(None), compat_str))
	937	elif opcode == 164: # modulo
	938	value2 = stack.pop()
	939	value1 = stack.pop()
	940	res = value1 % value2
	941	stack.append(res)
	942	elif opcode == 208: # getlocal_0
	943	stack.append(registers[0])
	944	elif opcode == 209: # getlocal_1
	945	stack.append(registers[1])
	946	elif opcode == 210: # getlocal_2
	947	stack.append(registers[2])
	948	elif opcode == 211: # getlocal_3
	949	stack.append(registers[3])
	950	elif opcode == 214: # setlocal_2
	951	registers[2] = stack.pop()
	952	elif opcode == 215: # setlocal_3
	953	registers[3] = stack.pop()
	954	else:
	955	raise NotImplementedError(
	956	u'Unsupported opcode %d' % opcode)
	957
	958	method_pyfunctions[func_name] = resfunc
	959	return resfunc
	960
	961	initial_function = extract_function(u'decipher')
	962	return lambda s: initial_function([s])
	963
	964	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	965	"""Turn the encrypted s field into a working signature"""
	966
	967	if player_url is not None:
	968	if player_url.startswith(u'//'):
	969	player_url = u'https:' + player_url
	970	try:
	971	player_id = (player_url, len(s))
	972	if player_id not in self._player_cache:
	973	func = self._extract_signature_function(
	974	video_id, player_url, len(s)
	975	)
	976	self._player_cache[player_id] = func
	977	func = self._player_cache[player_id]
	978	if self._downloader.params.get('youtube_print_sig_code'):
	979	self._print_sig_code(func, len(s))
	980	return func(s)
	981	except Exception:
	982	tb = traceback.format_exc()
	983	self._downloader.report_warning(
	984	u'Automatic signature extraction failed: ' + tb)
	985
	986	self._downloader.report_warning(
	987	u'Warning: Falling back to static signature algorithm')
	988
	989	return self._static_decrypt_signature(
	990	s, video_id, player_url, age_gate)
	991
	992	def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
	993	if age_gate:
	994	# The videos with age protection use another player, so the
	995	# algorithms can be different.
	996	if len(s) == 86:
	997	return s[2:63] + s[82] + s[64:82] + s[63]
	998
	999	if len(s) == 93:
	1000	return s[86:29:-1] + s[88] + s[28:5:-1]
	1001	elif len(s) == 92:
	1002	return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
	1003	elif len(s) == 91:
	1004	return s[84:27:-1] + s[86] + s[26:5:-1]
	1005	elif len(s) == 90:
	1006	return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
	1007	elif len(s) == 89:
	1008	return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
	1009	elif len(s) == 88:
	1010	return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
	1011	elif len(s) == 87:
	1012	return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
	1013	elif len(s) == 86:
	1014	return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
	1015	elif len(s) == 85:
	1016	return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
	1017	elif len(s) == 84:
	1018	return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
	1019	elif len(s) == 83:
	1020	return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
	1021	elif len(s) == 82:
	1022	return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
	1023	elif len(s) == 81:
	1024	return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
	1025	elif len(s) == 80:
	1026	return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
	1027	elif len(s) == 79:
	1028	return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
	1029
	1030	else:
	1031	raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
	1032
	1033	def _get_available_subtitles(self, video_id, webpage):
	1034	try:
	1035	sub_list = self._download_webpage(
	1036	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	1037	video_id, note=False)
	1038	except ExtractorError as err:
	1039	self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
	1040	return {}
	1041	lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
	1042
	1043	sub_lang_list = {}
	1044	for l in lang_list:
	1045	lang = l[1]
	1046	params = compat_urllib_parse.urlencode({
	1047	'lang': lang,
	1048	'v': video_id,
	1049	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	1050	'name': unescapeHTML(l[0]).encode('utf-8'),
	1051	})
	1052	url = u'https://www.youtube.com/api/timedtext?' + params
	1053	sub_lang_list[lang] = url
	1054	if not sub_lang_list:
	1055	self._downloader.report_warning(u'video doesn\'t have subtitles')
	1056	return {}
	1057	return sub_lang_list
	1058
	1059	def _get_available_automatic_caption(self, video_id, webpage):
	1060	"""We need the webpage for getting the captions url, pass it as an
	1061	argument to speed up the process."""
	1062	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	1063	self.to_screen(u'%s: Looking for automatic captions' % video_id)
	1064	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	1065	err_msg = u'Couldn\'t find automatic captions for %s' % video_id
	1066	if mobj is None:
	1067	self._downloader.report_warning(err_msg)
	1068	return {}
	1069	player_config = json.loads(mobj.group(1))
	1070	try:
	1071	args = player_config[u'args']
	1072	caption_url = args[u'ttsurl']
	1073	timestamp = args[u'timestamp']
	1074	# We get the available subtitles
	1075	list_params = compat_urllib_parse.urlencode({
	1076	'type': 'list',
	1077	'tlangs': 1,
	1078	'asrs': 1,
	1079	})
	1080	list_url = caption_url + '&' + list_params
	1081	caption_list = self._download_xml(list_url, video_id)
	1082	original_lang_node = caption_list.find('track')
	1083	if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
	1084	self._downloader.report_warning(u'Video doesn\'t have automatic captions')
	1085	return {}
	1086	original_lang = original_lang_node.attrib['lang_code']
	1087
	1088	sub_lang_list = {}
	1089	for lang_node in caption_list.findall('target'):
	1090	sub_lang = lang_node.attrib['lang_code']
	1091	params = compat_urllib_parse.urlencode({
	1092	'lang': original_lang,
	1093	'tlang': sub_lang,
	1094	'fmt': sub_format,
	1095	'ts': timestamp,
	1096	'kind': 'asr',
	1097	})
	1098	sub_lang_list[sub_lang] = caption_url + '&' + params
	1099	return sub_lang_list
	1100	# An extractor error can be raise by the download process if there are
	1101	# no automatic captions but there are subtitles
	1102	except (KeyError, ExtractorError):
	1103	self._downloader.report_warning(err_msg)
	1104	return {}
	1105
	1106	@classmethod
	1107	def extract_id(cls, url):
	1108	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	1109	if mobj is None:
	1110	raise ExtractorError(u'Invalid URL: %s' % url)
	1111	video_id = mobj.group(2)
	1112	return video_id
	1113
	1114	def _extract_from_m3u8(self, manifest_url, video_id):
	1115	url_map = {}
	1116	def _get_urls(_manifest):
	1117	lines = _manifest.split('\n')
	1118	urls = filter(lambda l: l and not l.startswith('#'),
	1119	lines)
	1120	return urls
	1121	manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
	1122	formats_urls = _get_urls(manifest)
	1123	for format_url in formats_urls:
	1124	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	1125	url_map[itag] = format_url
	1126	return url_map
	1127
	1128	def _extract_annotations(self, video_id):
	1129	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	1130	return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
	1131
	1132	def _real_extract(self, url):
	1133	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	1134	mobj = re.search(self._NEXT_URL_RE, url)
	1135	if mobj:
	1136	url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	1137	video_id = self.extract_id(url)
	1138
	1139	# Get video webpage
	1140	url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
	1141	video_webpage = self._download_webpage(url, video_id)
	1142
	1143	# Attempt to extract SWF player URL
	1144	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1145	if mobj is not None:
	1146	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1147	else:
	1148	player_url = None
	1149
	1150	# Get video info
	1151	self.report_video_info_webpage_download(video_id)
	1152	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	1153	self.report_age_confirmation()
	1154	age_gate = True
	1155	# We simulate the access to the video from www.youtube.com/v/{video_id}
	1156	# this can be viewed without login into Youtube
	1157	data = compat_urllib_parse.urlencode({'video_id': video_id,
	1158	'el': 'player_embedded',
	1159	'gl': 'US',
	1160	'hl': 'en',
	1161	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	1162	'asv': 3,
	1163	'sts':'1588',
	1164	})
	1165	video_info_url = 'https://www.youtube.com/get_video_info?' + data
	1166	video_info_webpage = self._download_webpage(video_info_url, video_id,
	1167	note=False,
	1168	errnote='unable to download video info webpage')
	1169	video_info = compat_parse_qs(video_info_webpage)
	1170	else:
	1171	age_gate = False
	1172	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1173	video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1174	% (video_id, el_type))
	1175	video_info_webpage = self._download_webpage(video_info_url, video_id,
	1176	note=False,
	1177	errnote='unable to download video info webpage')
	1178	video_info = compat_parse_qs(video_info_webpage)
	1179	if 'token' in video_info:
	1180	break
	1181	if 'token' not in video_info:
	1182	if 'reason' in video_info:
	1183	raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
	1184	else:
	1185	raise ExtractorError(u'"token" parameter not in video info for unknown reason')
	1186
	1187	if 'view_count' in video_info:
	1188	view_count = int(video_info['view_count'][0])
	1189	else:
	1190	view_count = None
	1191
	1192	# Check for "rental" videos
	1193	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	1194	raise ExtractorError(u'"rental" videos not supported')
	1195
	1196	# Start extracting information
	1197	self.report_information_extraction(video_id)
	1198
	1199	# uploader
	1200	if 'author' not in video_info:
	1201	raise ExtractorError(u'Unable to extract uploader name')
	1202	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	1203
	1204	# uploader_id
	1205	video_uploader_id = None
	1206	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	1207	if mobj is not None:
	1208	video_uploader_id = mobj.group(1)
	1209	else:
	1210	self._downloader.report_warning(u'unable to extract uploader nickname')
	1211
	1212	# title
	1213	if 'title' in video_info:
	1214	video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
	1215	else:
	1216	self._downloader.report_warning(u'Unable to extract video title')
	1217	video_title = u'_'
	1218
	1219	# thumbnail image
	1220	# We try first to get a high quality image:
	1221	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	1222	video_webpage, re.DOTALL)
	1223	if m_thumb is not None:
	1224	video_thumbnail = m_thumb.group(1)
	1225	elif 'thumbnail_url' not in video_info:
	1226	self._downloader.report_warning(u'unable to extract video thumbnail')
	1227	video_thumbnail = None
	1228	else: # don't panic if we can't find it
	1229	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	1230
	1231	# upload date
	1232	upload_date = None
	1233	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1234	if mobj is not None:
	1235	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1236	upload_date = unified_strdate(upload_date)
	1237
	1238	# description
	1239	video_description = get_element_by_id("eow-description", video_webpage)
	1240	if video_description:
	1241	video_description = re.sub(r'''(?x)
	1242	<a\s+
	1243	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1244	title="([^"]+)"\s+
	1245	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1246	class="yt-uix-redirect-link"\s*>
	1247	[^<]+
	1248	</a>
	1249	''', r'\1', video_description)
	1250	video_description = clean_html(video_description)
	1251	else:
	1252	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	1253	if fd_mobj:
	1254	video_description = unescapeHTML(fd_mobj.group(1))
	1255	else:
	1256	video_description = u''
	1257
	1258	def _extract_count(klass):
	1259	count = self._search_regex(
	1260	r'class="%s">([\d,]+)</span>' % re.escape(klass),
	1261	video_webpage, klass, default=None)
	1262	if count is not None:
	1263	return int(count.replace(',', ''))
	1264	return None
	1265	like_count = _extract_count(u'likes-count')
	1266	dislike_count = _extract_count(u'dislikes-count')
	1267
	1268	# subtitles
	1269	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	1270
	1271	if self._downloader.params.get('listsubtitles', False):
	1272	self._list_available_subtitles(video_id, video_webpage)
	1273	return
	1274
	1275	if 'length_seconds' not in video_info:
	1276	self._downloader.report_warning(u'unable to extract video duration')
	1277	video_duration = None
	1278	else:
	1279	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	1280
	1281	# annotations
	1282	video_annotations = None
	1283	if self._downloader.params.get('writeannotations', False):
	1284	video_annotations = self._extract_annotations(video_id)
	1285
	1286	# Decide which formats to download
	1287	try:
	1288	mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
	1289	if not mobj:
	1290	raise ValueError('Could not find vevo ID')
	1291	ytplayer_config = json.loads(mobj.group(1))
	1292	args = ytplayer_config['args']
	1293	# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
	1294	# this signatures are encrypted
	1295	if 'url_encoded_fmt_stream_map' not in args:
	1296	raise ValueError(u'No stream_map present') # caught below
	1297	re_signature = re.compile(r'[&,]s=')
	1298	m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
	1299	if m_s is not None:
	1300	self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
	1301	video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
	1302	m_s = re_signature.search(args.get('adaptive_fmts', u''))
	1303	if m_s is not None:
	1304	if 'adaptive_fmts' in video_info:
	1305	video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
	1306	else:
	1307	video_info['adaptive_fmts'] = [args['adaptive_fmts']]
	1308	except ValueError:
	1309	pass
	1310
	1311	def _map_to_format_list(urlmap):
	1312	formats = []
	1313	for itag, video_real_url in urlmap.items():
	1314	dct = {
	1315	'format_id': itag,
	1316	'url': video_real_url,
	1317	'player_url': player_url,
	1318	}
	1319	if itag in self._formats:
	1320	dct.update(self._formats[itag])
	1321	formats.append(dct)
	1322	return formats
	1323
	1324	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1325	self.report_rtmp_download()
	1326	formats = [{
	1327	'format_id': '_rtmp',
	1328	'protocol': 'rtmp',
	1329	'url': video_info['conn'][0],
	1330	'player_url': player_url,
	1331	}]
	1332	elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
	1333	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
	1334	if 'rtmpe%3Dyes' in encoded_url_map:
	1335	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1336	url_map = {}
	1337	for url_data_str in encoded_url_map.split(','):
	1338	url_data = compat_parse_qs(url_data_str)
	1339	if 'itag' in url_data and 'url' in url_data:
	1340	url = url_data['url'][0]
	1341	if 'sig' in url_data:
	1342	url += '&signature=' + url_data['sig'][0]
	1343	elif 's' in url_data:
	1344	encrypted_sig = url_data['s'][0]
	1345	if self._downloader.params.get('verbose'):
	1346	if age_gate:
	1347	if player_url is None:
	1348	player_version = 'unknown'
	1349	else:
	1350	player_version = self._search_regex(
	1351	r'-(.+)\.swf$', player_url,
	1352	u'flash player', fatal=False)
	1353	player_desc = 'flash player %s' % player_version
	1354	else:
	1355	player_version = self._search_regex(
	1356	r'html5player-(.+?)\.js', video_webpage,
	1357	'html5 player', fatal=False)
	1358	player_desc = u'html5 player %s' % player_version
	1359
	1360	parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
	1361	self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
	1362	(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
	1363
	1364	if not age_gate:
	1365	jsplayer_url_json = self._search_regex(
	1366	r'"assets":.+?"js":\s*("[^"]+")',
	1367	video_webpage, u'JS player URL')
	1368	player_url = json.loads(jsplayer_url_json)
	1369
	1370	signature = self._decrypt_signature(
	1371	encrypted_sig, video_id, player_url, age_gate)
	1372	url += '&signature=' + signature
	1373	if 'ratebypass' not in url:
	1374	url += '&ratebypass=yes'
	1375	url_map[url_data['itag'][0]] = url
	1376	formats = _map_to_format_list(url_map)
	1377	elif video_info.get('hlsvp'):
	1378	manifest_url = video_info['hlsvp'][0]
	1379	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1380	formats = _map_to_format_list(url_map)
	1381	else:
	1382	raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1383
	1384	# Look for the DASH manifest
	1385	if (self._downloader.params.get('youtube_include_dash_manifest', False)):
	1386	try:
	1387	# The DASH manifest used needs to be the one from the original video_webpage.
	1388	# The one found in get_video_info seems to be using different signatures.
	1389	# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
	1390	# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
	1391	# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
	1392	if age_gate:
	1393	dash_manifest_url = video_info.get('dashmpd')[0]
	1394	else:
	1395	dash_manifest_url = ytplayer_config['args']['dashmpd']
	1396	def decrypt_sig(mobj):
	1397	s = mobj.group(1)
	1398	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	1399	return '/signature/%s' % dec_s
	1400	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	1401	dash_doc = self._download_xml(
	1402	dash_manifest_url, video_id,
	1403	note=u'Downloading DASH manifest',
	1404	errnote=u'Could not download DASH manifest')
	1405	for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	1406	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	1407	if url_el is None:
	1408	continue
	1409	format_id = r.attrib['id']
	1410	video_url = url_el.text
	1411	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	1412	f = {
	1413	'format_id': format_id,
	1414	'url': video_url,
	1415	'width': int_or_none(r.attrib.get('width')),
	1416	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	1417	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	1418	'filesize': filesize,
	1419	}
	1420	try:
	1421	existing_format = next(
	1422	fo for fo in formats
	1423	if fo['format_id'] == format_id)
	1424	except StopIteration:
	1425	f.update(self._formats.get(format_id, {}))
	1426	formats.append(f)
	1427	else:
	1428	existing_format.update(f)
	1429
	1430	except (ExtractorError, KeyError) as e:
	1431	self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
	1432
	1433	self._sort_formats(formats)
	1434
	1435	return {
	1436	'id': video_id,
	1437	'uploader': video_uploader,
	1438	'uploader_id': video_uploader_id,
	1439	'upload_date': upload_date,
	1440	'title': video_title,
	1441	'thumbnail': video_thumbnail,
	1442	'description': video_description,
	1443	'subtitles': video_subtitles,
	1444	'duration': video_duration,
	1445	'age_limit': 18 if age_gate else 0,
	1446	'annotations': video_annotations,
	1447	'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
	1448	'view_count': view_count,
	1449	'like_count': like_count,
	1450	'dislike_count': dislike_count,
	1451	'formats': formats,
	1452	}
	1453
	1454	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1455	IE_DESC = u'YouTube.com playlists'
	1456	_VALID_URL = r"""(?x)(?:
	1457	(?:https?://)?
	1458	(?:\w+\.)?
	1459	youtube\.com/
	1460	(?:
	1461	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch)
	1462	\? (?:.?&)? (?:p\|a\|list)=
	1463	\| p/
	1464	)
	1465	(
	1466	(?:PL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1467	# Top tracks, they can also include dots
	1468	\|(?:MC)[\w\.]*
	1469	)
	1470	.*
	1471	\|
	1472	((?:PL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1473	)"""
	1474	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1475	_MORE_PAGES_INDICATOR = r'data-link-type="next"'
	1476	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1477	IE_NAME = u'youtube:playlist'
	1478
	1479	def _real_initialize(self):
	1480	self._login()
	1481
	1482	def _ids_to_results(self, ids):
	1483	return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1484	for vid_id in ids]
	1485
	1486	def _extract_mix(self, playlist_id):
	1487	# The mixes are generated from a a single video
	1488	# the id of the playlist is just 'RD' + video_id
	1489	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1490	webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
	1491	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1492	title_span = (search_title('playlist-title') or
	1493	search_title('title long-title') or search_title('title'))
	1494	title = clean_html(title_span)
	1495	video_re = r'''(?x)data-video-username="(.?)".?
	1496	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
	1497	matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
	1498	# Some of the videos may have been deleted, their username field is empty
	1499	ids = [video_id for (username, video_id) in matches if username]
	1500	url_results = self._ids_to_results(ids)
	1501
	1502	return self.playlist_result(url_results, playlist_id, title)
	1503
	1504	def _real_extract(self, url):
	1505	# Extract playlist id
	1506	mobj = re.match(self._VALID_URL, url)
	1507	if mobj is None:
	1508	raise ExtractorError(u'Invalid URL: %s' % url)
	1509	playlist_id = mobj.group(1) or mobj.group(2)
	1510
	1511	# Check if it's a video-specific URL
	1512	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1513	if 'v' in query_dict:
	1514	video_id = query_dict['v'][0]
	1515	if self._downloader.params.get('noplaylist'):
	1516	self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
	1517	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1518	else:
	1519	self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1520
	1521	if playlist_id.startswith('RD'):
	1522	# Mixes require a custom extraction process
	1523	return self._extract_mix(playlist_id)
	1524	if playlist_id.startswith('TL'):
	1525	raise ExtractorError(u'For downloading YouTube.com top lists, use '
	1526	u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
	1527
	1528	url = self._TEMPLATE_URL % playlist_id
	1529	page = self._download_webpage(url, playlist_id)
	1530	more_widget_html = content_html = page
	1531
	1532	# Extract the video ids from the playlist pages
	1533	ids = []
	1534
	1535	for page_num in itertools.count(1):
	1536	matches = re.finditer(self._VIDEO_RE, content_html)
	1537	# We remove the duplicates and the link with index 0
	1538	# (it's not the first video of the playlist)
	1539	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1540	ids.extend(new_ids)
	1541
	1542	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1543	if not mobj:
	1544	break
	1545
	1546	more = self._download_json(
	1547	'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
	1548	content_html = more['content_html']
	1549	more_widget_html = more['load_more_widget_html']
	1550
	1551	playlist_title = self._html_search_regex(
	1552	r'<h1 class="pl-header-title">\s(.?)\s*</h1>', page, u'title')
	1553
	1554	url_results = self._ids_to_results(ids)
	1555	return self.playlist_result(url_results, playlist_id, playlist_title)
	1556
	1557
	1558	class YoutubeTopListIE(YoutubePlaylistIE):
	1559	IE_NAME = u'youtube:toplist'
	1560	IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
	1561	u' (Example: "yttoplist:music:Top Tracks")')
	1562	_VALID_URL = r'yttoplist:(?P<chann>.?):(?P<title>.?)$'
	1563
	1564	def _real_extract(self, url):
	1565	mobj = re.match(self._VALID_URL, url)
	1566	channel = mobj.group('chann')
	1567	title = mobj.group('title')
	1568	query = compat_urllib_parse.urlencode({'title': title})
	1569	playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
	1570	channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
	1571	link = self._html_search_regex(playlist_re, channel_page, u'list')
	1572	url = compat_urlparse.urljoin('https://www.youtube.com/', link)
	1573
	1574	video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
	1575	ids = []
	1576	# sometimes the webpage doesn't contain the videos
	1577	# retry until we get them
	1578	for i in itertools.count(0):
	1579	msg = u'Downloading Youtube mix'
	1580	if i > 0:
	1581	msg += ', retry #%d' % i
	1582	webpage = self._download_webpage(url, title, msg)
	1583	ids = orderedSet(re.findall(video_re, webpage))
	1584	if ids:
	1585	break
	1586	url_results = self._ids_to_results(ids)
	1587	return self.playlist_result(url_results, playlist_title=title)
	1588
	1589
	1590	class YoutubeChannelIE(InfoExtractor):
	1591	IE_DESC = u'YouTube.com channels'
	1592	_VALID_URL = r"^(?:https?://)?(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
	1593	_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
	1594	_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
	1595	IE_NAME = u'youtube:channel'
	1596
	1597	def extract_videos_from_page(self, page):
	1598	ids_in_page = []
	1599	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1600	if mobj.group(1) not in ids_in_page:
	1601	ids_in_page.append(mobj.group(1))
	1602	return ids_in_page
	1603
	1604	def _real_extract(self, url):
	1605	# Extract channel id
	1606	mobj = re.match(self._VALID_URL, url)
	1607	if mobj is None:
	1608	raise ExtractorError(u'Invalid URL: %s' % url)
	1609
	1610	# Download channel page
	1611	channel_id = mobj.group(1)
	1612	video_ids = []
	1613	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1614	channel_page = self._download_webpage(url, channel_id)
	1615	autogenerated = re.search(r'''(?x)
	1616	class="[^"]*?(?:
	1617	channel-header-autogenerated-label\|
	1618	yt-channel-title-autogenerated
	1619	)[^"]*"''', channel_page) is not None
	1620
	1621	if autogenerated:
	1622	# The videos are contained in a single page
	1623	# the ajax pages can't be used, they are empty
	1624	video_ids = self.extract_videos_from_page(channel_page)
	1625	else:
	1626	# Download all channel pages using the json-based channel_ajax query
	1627	for pagenum in itertools.count(1):
	1628	url = self._MORE_PAGES_URL % (pagenum, channel_id)
	1629	page = self._download_json(
	1630	url, channel_id, note=u'Downloading page #%s' % pagenum,
	1631	transform_source=uppercase_escape)
	1632
	1633	ids_in_page = self.extract_videos_from_page(page['content_html'])
	1634	video_ids.extend(ids_in_page)
	1635
	1636	if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
	1637	break
	1638
	1639	self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
	1640
	1641	url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1642	for video_id in video_ids]
	1643	return self.playlist_result(url_entries, channel_id)
	1644
	1645
	1646	class YoutubeUserIE(InfoExtractor):
	1647	IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
	1648	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
	1649	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1650	_GDATA_PAGE_SIZE = 50
	1651	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1652	IE_NAME = u'youtube:user'
	1653
	1654	@classmethod
	1655	def suitable(cls, url):
	1656	# Don't return True if the url can be extracted with other youtube
	1657	# extractor, the regex would is too permissive and it would match.
	1658	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1659	if any(ie.suitable(url) for ie in other_ies): return False
	1660	else: return super(YoutubeUserIE, cls).suitable(url)
	1661
	1662	def _real_extract(self, url):
	1663	# Extract username
	1664	mobj = re.match(self._VALID_URL, url)
	1665	if mobj is None:
	1666	raise ExtractorError(u'Invalid URL: %s' % url)
	1667
	1668	username = mobj.group(1)
	1669
	1670	# Download video ids using YouTube Data API. Result size per
	1671	# query is limited (currently to 50 videos) so we need to query
	1672	# page by page until there are no video ids - it means we got
	1673	# all of them.
	1674
	1675	def download_page(pagenum):
	1676	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1677
	1678	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1679	page = self._download_webpage(
	1680	gdata_url, username,
	1681	u'Downloading video ids from %d to %d' % (
	1682	start_index, start_index + self._GDATA_PAGE_SIZE))
	1683
	1684	try:
	1685	response = json.loads(page)
	1686	except ValueError as err:
	1687	raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
	1688	if 'entry' not in response['feed']:
	1689	return
	1690
	1691	# Extract video identifiers
	1692	entries = response['feed']['entry']
	1693	for entry in entries:
	1694	title = entry['title']['$t']
	1695	video_id = entry['id']['$t'].split('/')[-1]
	1696	yield {
	1697	'_type': 'url',
	1698	'url': video_id,
	1699	'ie_key': 'Youtube',
	1700	'id': video_id,
	1701	'title': title,
	1702	}
	1703	url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
	1704
	1705	return self.playlist_result(url_results, playlist_title=username)
	1706
	1707
	1708	class YoutubeSearchIE(SearchInfoExtractor):
	1709	IE_DESC = u'YouTube.com searches'
	1710	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1711	_MAX_RESULTS = 1000
	1712	IE_NAME = u'youtube:search'
	1713	_SEARCH_KEY = 'ytsearch'
	1714
	1715	def _get_n_results(self, query, n):
	1716	"""Get a specified number of results for a query"""
	1717
	1718	video_ids = []
	1719	pagenum = 0
	1720	limit = n
	1721
	1722	while (50 * pagenum) < limit:
	1723	result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
	1724	data_json = self._download_webpage(
	1725	result_url, video_id=u'query "%s"' % query,
	1726	note=u'Downloading page %s' % (pagenum + 1),
	1727	errnote=u'Unable to download API page')
	1728	data = json.loads(data_json)
	1729	api_response = data['data']
	1730
	1731	if 'items' not in api_response:
	1732	raise ExtractorError(
	1733	u'[youtube] No video results', expected=True)
	1734
	1735	new_ids = list(video['id'] for video in api_response['items'])
	1736	video_ids += new_ids
	1737
	1738	limit = min(n, api_response['totalItems'])
	1739	pagenum += 1
	1740
	1741	if len(video_ids) > n:
	1742	video_ids = video_ids[:n]
	1743	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1744	for video_id in video_ids]
	1745	return self.playlist_result(videos, query)
	1746
	1747	class YoutubeSearchDateIE(YoutubeSearchIE):
	1748	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1749	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1750	_SEARCH_KEY = 'ytsearchdate'
	1751	IE_DESC = u'YouTube.com searches, newest videos first'
	1752
	1753	class YoutubeShowIE(InfoExtractor):
	1754	IE_DESC = u'YouTube.com (multi-season) shows'
	1755	_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
	1756	IE_NAME = u'youtube:show'
	1757
	1758	def _real_extract(self, url):
	1759	mobj = re.match(self._VALID_URL, url)
	1760	show_name = mobj.group(1)
	1761	webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
	1762	# There's one playlist for each season of the show
	1763	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1764	self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
	1765	return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
	1766
	1767
	1768	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1769	"""
	1770	Base class for extractors that fetch info from
	1771	http://www.youtube.com/feed_ajax
	1772	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1773	"""
	1774	_LOGIN_REQUIRED = True
	1775	# use action_load_personal_feed instead of action_load_system_feed
	1776	_PERSONAL_FEED = False
	1777
	1778	@property
	1779	def _FEED_TEMPLATE(self):
	1780	action = 'action_load_system_feed'
	1781	if self._PERSONAL_FEED:
	1782	action = 'action_load_personal_feed'
	1783	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1784
	1785	@property
	1786	def IE_NAME(self):
	1787	return u'youtube:%s' % self._FEED_NAME
	1788
	1789	def _real_initialize(self):
	1790	self._login()
	1791
	1792	def _real_extract(self, url):
	1793	feed_entries = []
	1794	paging = 0
	1795	for i in itertools.count(1):
	1796	info = self._download_webpage(self._FEED_TEMPLATE % paging,
	1797	u'%s feed' % self._FEED_NAME,
	1798	u'Downloading page %s' % i)
	1799	info = json.loads(info)
	1800	feed_html = info['feed_html']
	1801	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1802	ids = orderedSet(m.group(1) for m in m_ids)
	1803	feed_entries.extend(
	1804	self.url_result(video_id, 'Youtube', video_id=video_id)
	1805	for video_id in ids)
	1806	if info['paging'] is None:
	1807	break
	1808	paging = info['paging']
	1809	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1810
	1811	class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
	1812	IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
	1813	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1814	_FEED_NAME = 'subscriptions'
	1815	_PLAYLIST_TITLE = u'Youtube Subscriptions'
	1816
	1817	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1818	IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
	1819	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1820	_FEED_NAME = 'recommended'
	1821	_PLAYLIST_TITLE = u'Youtube Recommended videos'
	1822
	1823	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1824	IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
	1825	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1826	_FEED_NAME = 'watch_later'
	1827	_PLAYLIST_TITLE = u'Youtube Watch Later'
	1828	_PERSONAL_FEED = True
	1829
	1830	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1831	IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
	1832	_VALID_URL = u'https?://www\.youtube\.com/feed/history\|:ythistory'
	1833	_FEED_NAME = 'history'
	1834	_PERSONAL_FEED = True
	1835	_PLAYLIST_TITLE = u'Youtube Watch History'
	1836
	1837	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1838	IE_NAME = u'youtube:favorites'
	1839	IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
	1840	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1841	_LOGIN_REQUIRED = True
	1842
	1843	def _real_extract(self, url):
	1844	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1845	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
	1846	return self.url_result(playlist_id, 'YoutubePlaylist')
	1847
	1848
	1849	class YoutubeTruncatedURLIE(InfoExtractor):
	1850	IE_NAME = 'youtube:truncated_url'
	1851	IE_DESC = False # Do not list
	1852	_VALID_URL = r'''(?x)
	1853	(?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$\|
	1854	(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
	1855	'''
	1856
	1857	def _real_extract(self, url):
	1858	raise ExtractorError(
	1859	u'Did you forget to quote the URL? Remember that & is a meta '
	1860	u'character in most shells, so you want to put the URL in quotes, '
	1861	u'like youtube-dl '
	1862	u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1863	u' or simply youtube-dl BaW_jenozKc .',
	1864	expected=True)