jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	import collections
	4	import errno
	5	import io
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import socket
	11	import string
	12	import struct
	13	import traceback
	14	import xml.etree.ElementTree
	15	import zlib
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from .subtitles import SubtitlesInfoExtractor
	19	from ..utils import (
	20	compat_chr,
	21	compat_http_client,
	22	compat_parse_qs,
	23	compat_urllib_error,
	24	compat_urllib_parse,
	25	compat_urllib_request,
	26	compat_urlparse,
	27	compat_str,
	28
	29	clean_html,
	30	get_cachedir,
	31	get_element_by_id,
	32	ExtractorError,
	33	unescapeHTML,
	34	unified_strdate,
	35	orderedSet,
	36	write_json_file,
	37	)
	38
	39	class YoutubeBaseInfoExtractor(InfoExtractor):
	40	"""Provide base functions for Youtube extractors"""
	41	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	42	_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	43	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	44	_NETRC_MACHINE = 'youtube'
	45	# If True it will raise an error if no login info is provided
	46	_LOGIN_REQUIRED = False
	47
	48	def report_lang(self):
	49	"""Report attempt to set language."""
	50	self.to_screen(u'Setting language')
	51
	52	def _set_language(self):
	53	request = compat_urllib_request.Request(self._LANG_URL)
	54	try:
	55	self.report_lang()
	56	compat_urllib_request.urlopen(request).read()
	57	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	58	self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
	59	return False
	60	return True
	61
	62	def _login(self):
	63	(username, password) = self._get_login_info()
	64	# No authentication to be performed
	65	if username is None:
	66	if self._LOGIN_REQUIRED:
	67	raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	68	return False
	69
	70	request = compat_urllib_request.Request(self._LOGIN_URL)
	71	try:
	72	login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
	73	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	74	self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
	75	return False
	76
	77	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	78	login_page, u'Login GALX parameter')
	79
	80	# Log in
	81	login_form_strs = {
	82	u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	83	u'Email': username,
	84	u'GALX': galx,
	85	u'Passwd': password,
	86	u'PersistentCookie': u'yes',
	87	u'_utf8': u'霱',
	88	u'bgresponse': u'js_disabled',
	89	u'checkConnection': u'',
	90	u'checkedDomains': u'youtube',
	91	u'dnConn': u'',
	92	u'pstMsg': u'0',
	93	u'rmShown': u'1',
	94	u'secTok': u'',
	95	u'signIn': u'Sign in',
	96	u'timeStmp': u'',
	97	u'service': u'youtube',
	98	u'uilel': u'3',
	99	u'hl': u'en_US',
	100	}
	101	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	102	# chokes on unicode
	103	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
	104	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	105	request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	106	try:
	107	self.report_login()
	108	login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
	109	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	110	self._downloader.report_warning(u'unable to log in: bad username or password')
	111	return False
	112	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	113	self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
	114	return False
	115	return True
	116
	117	def _confirm_age(self):
	118	age_form = {
	119	'next_url': '/',
	120	'action_confirm': 'Confirm',
	121	}
	122	request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
	123	try:
	124	self.report_age_confirmation()
	125	compat_urllib_request.urlopen(request).read().decode('utf-8')
	126	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	127	raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
	128	return True
	129
	130	def _real_initialize(self):
	131	if self._downloader is None:
	132	return
	133	if not self._set_language():
	134	return
	135	if not self._login():
	136	return
	137	self._confirm_age()
	138
	139
	140	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	141	IE_DESC = u'YouTube.com'
	142	_VALID_URL = r"""^
	143	(
	144	(?:https?://)? # http(s):// (optional)
	145	(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/\|
	146	tube\.majestyc\.net/\|
	147	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	148	(?:.*?\#/)? # handle anchor (#/) redirect urls
	149	(?: # the various things that can precede the ID:
	150	(?:(?:v\|embed\|e)/) # v/ or embed/ or e/
	151	\|(?: # or the v= param in all its forms
	152	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	153	(?:\?\|\#!?) # the params delimiter ? or # or #!
	154	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	155	v=
	156	)
	157	))
	158	\|youtu\.be/ # just youtu.be/xxxx
	159	)
	160	)? # all until now is optional -> you can pass the naked ID
	161	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	162	(?(1).+)? # if we found the ID, everything can follow
	163	$"""
	164	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	165	# Listed in order of quality
	166	_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
	167	# Apple HTTP Live Streaming
	168	'96', '95', '94', '93', '92', '132', '151',
	169	# 3D
	170	'85', '84', '102', '83', '101', '82', '100',
	171	# Dash video
	172	'138', '137', '248', '136', '247', '135', '246',
	173	'245', '244', '134', '243', '133', '242', '160',
	174	# Dash audio
	175	'141', '172', '140', '171', '139',
	176	]
	177	_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
	178	# Apple HTTP Live Streaming
	179	'96', '95', '94', '93', '92', '132', '151',
	180	# 3D
	181	'85', '102', '84', '101', '83', '100', '82',
	182	# Dash video
	183	'138', '248', '137', '247', '136', '246', '245',
	184	'244', '135', '243', '134', '242', '133', '160',
	185	# Dash audio
	186	'172', '141', '171', '140', '139',
	187	]
	188	_video_formats_map = {
	189	'flv': ['35', '34', '6', '5'],
	190	'3gp': ['36', '17', '13'],
	191	'mp4': ['38', '37', '22', '18'],
	192	'webm': ['46', '45', '44', '43'],
	193	}
	194	_video_extensions = {
	195	'13': '3gp',
	196	'17': '3gp',
	197	'18': 'mp4',
	198	'22': 'mp4',
	199	'36': '3gp',
	200	'37': 'mp4',
	201	'38': 'mp4',
	202	'43': 'webm',
	203	'44': 'webm',
	204	'45': 'webm',
	205	'46': 'webm',
	206
	207	# 3d videos
	208	'82': 'mp4',
	209	'83': 'mp4',
	210	'84': 'mp4',
	211	'85': 'mp4',
	212	'100': 'webm',
	213	'101': 'webm',
	214	'102': 'webm',
	215
	216	# Apple HTTP Live Streaming
	217	'92': 'mp4',
	218	'93': 'mp4',
	219	'94': 'mp4',
	220	'95': 'mp4',
	221	'96': 'mp4',
	222	'132': 'mp4',
	223	'151': 'mp4',
	224
	225	# Dash mp4
	226	'133': 'mp4',
	227	'134': 'mp4',
	228	'135': 'mp4',
	229	'136': 'mp4',
	230	'137': 'mp4',
	231	'138': 'mp4',
	232	'160': 'mp4',
	233
	234	# Dash mp4 audio
	235	'139': 'm4a',
	236	'140': 'm4a',
	237	'141': 'm4a',
	238
	239	# Dash webm
	240	'171': 'webm',
	241	'172': 'webm',
	242	'242': 'webm',
	243	'243': 'webm',
	244	'244': 'webm',
	245	'245': 'webm',
	246	'246': 'webm',
	247	'247': 'webm',
	248	'248': 'webm',
	249	}
	250	_video_dimensions = {
	251	'5': '240x400',
	252	'6': '???',
	253	'13': '???',
	254	'17': '144x176',
	255	'18': '360x640',
	256	'22': '720x1280',
	257	'34': '360x640',
	258	'35': '480x854',
	259	'36': '240x320',
	260	'37': '1080x1920',
	261	'38': '3072x4096',
	262	'43': '360x640',
	263	'44': '480x854',
	264	'45': '720x1280',
	265	'46': '1080x1920',
	266	'82': '360p',
	267	'83': '480p',
	268	'84': '720p',
	269	'85': '1080p',
	270	'92': '240p',
	271	'93': '360p',
	272	'94': '480p',
	273	'95': '720p',
	274	'96': '1080p',
	275	'100': '360p',
	276	'101': '480p',
	277	'102': '720p',
	278	'132': '240p',
	279	'151': '72p',
	280	'133': '240p',
	281	'134': '360p',
	282	'135': '480p',
	283	'136': '720p',
	284	'137': '1080p',
	285	'138': '>1080p',
	286	'139': '48k',
	287	'140': '128k',
	288	'141': '256k',
	289	'160': '192p',
	290	'171': '128k',
	291	'172': '256k',
	292	'242': '240p',
	293	'243': '360p',
	294	'244': '480p',
	295	'245': '480p',
	296	'246': '480p',
	297	'247': '720p',
	298	'248': '1080p',
	299	}
	300	_special_itags = {
	301	'82': '3D',
	302	'83': '3D',
	303	'84': '3D',
	304	'85': '3D',
	305	'100': '3D',
	306	'101': '3D',
	307	'102': '3D',
	308	'133': 'DASH Video',
	309	'134': 'DASH Video',
	310	'135': 'DASH Video',
	311	'136': 'DASH Video',
	312	'137': 'DASH Video',
	313	'138': 'DASH Video',
	314	'139': 'DASH Audio',
	315	'140': 'DASH Audio',
	316	'141': 'DASH Audio',
	317	'160': 'DASH Video',
	318	'171': 'DASH Audio',
	319	'172': 'DASH Audio',
	320	'242': 'DASH Video',
	321	'243': 'DASH Video',
	322	'244': 'DASH Video',
	323	'245': 'DASH Video',
	324	'246': 'DASH Video',
	325	'247': 'DASH Video',
	326	'248': 'DASH Video',
	327	}
	328
	329	IE_NAME = u'youtube'
	330	_TESTS = [
	331	{
	332	u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
	333	u"file": u"BaW_jenozKc.mp4",
	334	u"info_dict": {
	335	u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
	336	u"uploader": u"Philipp Hagemeister",
	337	u"uploader_id": u"phihag",
	338	u"upload_date": u"20121002",
	339	u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
	340	}
	341	},
	342	{
	343	u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
	344	u"file": u"UxxajLWwzqY.mp4",
	345	u"note": u"Test generic use_cipher_signature video (#897)",
	346	u"info_dict": {
	347	u"upload_date": u"20120506",
	348	u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
	349	u"description": u"md5:5b292926389560516e384ac437c0ec07",
	350	u"uploader": u"Icona Pop",
	351	u"uploader_id": u"IconaPop"
	352	}
	353	},
	354	{
	355	u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
	356	u"file": u"07FYdnEawAQ.mp4",
	357	u"note": u"Test VEVO video with age protection (#956)",
	358	u"info_dict": {
	359	u"upload_date": u"20130703",
	360	u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
	361	u"description": u"md5:64249768eec3bc4276236606ea996373",
	362	u"uploader": u"justintimberlakeVEVO",
	363	u"uploader_id": u"justintimberlakeVEVO"
	364	}
	365	},
	366	]
	367
	368
	369	@classmethod
	370	def suitable(cls, url):
	371	"""Receives a URL and returns True if suitable for this IE."""
	372	if YoutubePlaylistIE.suitable(url): return False
	373	return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
	374
	375	def __init__(self, args, *kwargs):
	376	super(YoutubeIE, self).__init__(args, *kwargs)
	377	self._player_cache = {}
	378
	379	def report_video_webpage_download(self, video_id):
	380	"""Report attempt to download video webpage."""
	381	self.to_screen(u'%s: Downloading video webpage' % video_id)
	382
	383	def report_video_info_webpage_download(self, video_id):
	384	"""Report attempt to download video info webpage."""
	385	self.to_screen(u'%s: Downloading video info webpage' % video_id)
	386
	387	def report_information_extraction(self, video_id):
	388	"""Report attempt to extract video information."""
	389	self.to_screen(u'%s: Extracting video information' % video_id)
	390
	391	def report_unavailable_format(self, video_id, format):
	392	"""Report extracted video URL."""
	393	self.to_screen(u'%s: Format %s not available' % (video_id, format))
	394
	395	def report_rtmp_download(self):
	396	"""Indicate the download will use the RTMP protocol."""
	397	self.to_screen(u'RTMP download detected')
	398
	399	def _extract_signature_function(self, video_id, player_url, slen):
	400	id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
	401	player_url)
	402	player_type = id_m.group('ext')
	403	player_id = id_m.group('id')
	404
	405	# Read from filesystem cache
	406	func_id = '%s_%s_%d' % (player_type, player_id, slen)
	407	assert os.path.basename(func_id) == func_id
	408	cache_dir = get_cachedir(self._downloader.params)
	409
	410	cache_enabled = cache_dir is not None
	411	if cache_enabled:
	412	cache_fn = os.path.join(os.path.expanduser(cache_dir),
	413	u'youtube-sigfuncs',
	414	func_id + '.json')
	415	try:
	416	with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
	417	cache_spec = json.load(cachef)
	418	return lambda s: u''.join(s[i] for i in cache_spec)
	419	except IOError:
	420	pass # No cache available
	421
	422	if player_type == 'js':
	423	code = self._download_webpage(
	424	player_url, video_id,
	425	note=u'Downloading %s player %s' % (player_type, player_id),
	426	errnote=u'Download of %s failed' % player_url)
	427	res = self._parse_sig_js(code)
	428	elif player_type == 'swf':
	429	urlh = self._request_webpage(
	430	player_url, video_id,
	431	note=u'Downloading %s player %s' % (player_type, player_id),
	432	errnote=u'Download of %s failed' % player_url)
	433	code = urlh.read()
	434	res = self._parse_sig_swf(code)
	435	else:
	436	assert False, 'Invalid player type %r' % player_type
	437
	438	if cache_enabled:
	439	try:
	440	test_string = u''.join(map(compat_chr, range(slen)))
	441	cache_res = res(test_string)
	442	cache_spec = [ord(c) for c in cache_res]
	443	try:
	444	os.makedirs(os.path.dirname(cache_fn))
	445	except OSError as ose:
	446	if ose.errno != errno.EEXIST:
	447	raise
	448	write_json_file(cache_spec, cache_fn)
	449	except Exception:
	450	tb = traceback.format_exc()
	451	self._downloader.report_warning(
	452	u'Writing cache to %r failed: %s' % (cache_fn, tb))
	453
	454	return res
	455
	456	def _print_sig_code(self, func, slen):
	457	def gen_sig_code(idxs):
	458	def _genslice(start, end, step):
	459	starts = u'' if start == 0 else str(start)
	460	ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
	461	steps = u'' if step == 1 else (u':%d' % step)
	462	return u's[%s%s%s]' % (starts, ends, steps)
	463
	464	step = None
	465	start = '(Never used)' # Quelch pyflakes warnings - start will be
	466	# set as soon as step is set
	467	for i, prev in zip(idxs[1:], idxs[:-1]):
	468	if step is not None:
	469	if i - prev == step:
	470	continue
	471	yield _genslice(start, prev, step)
	472	step = None
	473	continue
	474	if i - prev in [-1, 1]:
	475	step = i - prev
	476	start = prev
	477	continue
	478	else:
	479	yield u's[%d]' % prev
	480	if step is None:
	481	yield u's[%d]' % i
	482	else:
	483	yield _genslice(start, i, step)
	484
	485	test_string = u''.join(map(compat_chr, range(slen)))
	486	cache_res = func(test_string)
	487	cache_spec = [ord(c) for c in cache_res]
	488	expr_code = u' + '.join(gen_sig_code(cache_spec))
	489	code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
	490	self.to_screen(u'Extracted signature function:\n' + code)
	491
	492	def _parse_sig_js(self, jscode):
	493	funcname = self._search_regex(
	494	r'signature=([a-zA-Z]+)', jscode,
	495	u'Initial JS player signature function name')
	496
	497	functions = {}
	498
	499	def argidx(varname):
	500	return string.lowercase.index(varname)
	501
	502	def interpret_statement(stmt, local_vars, allow_recursion=20):
	503	if allow_recursion < 0:
	504	raise ExtractorError(u'Recursion limit reached')
	505
	506	if stmt.startswith(u'var '):
	507	stmt = stmt[len(u'var '):]
	508	ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
	509	r'=(?P<expr>.*)$', stmt)
	510	if ass_m:
	511	if ass_m.groupdict().get('index'):
	512	def assign(val):
	513	lvar = local_vars[ass_m.group('out')]
	514	idx = interpret_expression(ass_m.group('index'),
	515	local_vars, allow_recursion)
	516	assert isinstance(idx, int)
	517	lvar[idx] = val
	518	return val
	519	expr = ass_m.group('expr')
	520	else:
	521	def assign(val):
	522	local_vars[ass_m.group('out')] = val
	523	return val
	524	expr = ass_m.group('expr')
	525	elif stmt.startswith(u'return '):
	526	assign = lambda v: v
	527	expr = stmt[len(u'return '):]
	528	else:
	529	raise ExtractorError(
	530	u'Cannot determine left side of statement in %r' % stmt)
	531
	532	v = interpret_expression(expr, local_vars, allow_recursion)
	533	return assign(v)
	534
	535	def interpret_expression(expr, local_vars, allow_recursion):
	536	if expr.isdigit():
	537	return int(expr)
	538
	539	if expr.isalpha():
	540	return local_vars[expr]
	541
	542	m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
	543	if m:
	544	member = m.group('member')
	545	val = local_vars[m.group('in')]
	546	if member == 'split("")':
	547	return list(val)
	548	if member == 'join("")':
	549	return u''.join(val)
	550	if member == 'length':
	551	return len(val)
	552	if member == 'reverse()':
	553	return val[::-1]
	554	slice_m = re.match(r'slice$(?P<idx>.*)$', member)
	555	if slice_m:
	556	idx = interpret_expression(
	557	slice_m.group('idx'), local_vars, allow_recursion-1)
	558	return val[idx:]
	559
	560	m = re.match(
	561	r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
	562	if m:
	563	val = local_vars[m.group('in')]
	564	idx = interpret_expression(m.group('idx'), local_vars,
	565	allow_recursion-1)
	566	return val[idx]
	567
	568	m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
	569	if m:
	570	a = interpret_expression(m.group('a'),
	571	local_vars, allow_recursion)
	572	b = interpret_expression(m.group('b'),
	573	local_vars, allow_recursion)
	574	return a % b
	575
	576	m = re.match(
	577	r'^(?P<func>[a-zA-Z]+)$(?P<args>[a-z0-9,]+)$$', expr)
	578	if m:
	579	fname = m.group('func')
	580	if fname not in functions:
	581	functions[fname] = extract_function(fname)
	582	argvals = [int(v) if v.isdigit() else local_vars[v]
	583	for v in m.group('args').split(',')]
	584	return functions[fname](argvals)
	585	raise ExtractorError(u'Unsupported JS expression %r' % expr)
	586
	587	def extract_function(funcname):
	588	func_m = re.search(
	589	r'function ' + re.escape(funcname) +
	590	r'$(?P<args>[a-z,]+)${(?P<code>[^}]+)}',
	591	jscode)
	592	argnames = func_m.group('args').split(',')
	593
	594	def resf(args):
	595	local_vars = dict(zip(argnames, args))
	596	for stmt in func_m.group('code').split(';'):
	597	res = interpret_statement(stmt, local_vars)
	598	return res
	599	return resf
	600
	601	initial_function = extract_function(funcname)
	602	return lambda s: initial_function([s])
	603
	604	def _parse_sig_swf(self, file_contents):
	605	if file_contents[1:3] != b'WS':
	606	raise ExtractorError(
	607	u'Not an SWF file; header is %r' % file_contents[:3])
	608	if file_contents[:1] == b'C':
	609	content = zlib.decompress(file_contents[8:])
	610	else:
	611	raise NotImplementedError(u'Unsupported compression format %r' %
	612	file_contents[:1])
	613
	614	def extract_tags(content):
	615	pos = 0
	616	while pos < len(content):
	617	header16 = struct.unpack('<H', content[pos:pos+2])[0]
	618	pos += 2
	619	tag_code = header16 >> 6
	620	tag_len = header16 & 0x3f
	621	if tag_len == 0x3f:
	622	tag_len = struct.unpack('<I', content[pos:pos+4])[0]
	623	pos += 4
	624	assert pos+tag_len <= len(content)
	625	yield (tag_code, content[pos:pos+tag_len])
	626	pos += tag_len
	627
	628	code_tag = next(tag
	629	for tag_code, tag in extract_tags(content)
	630	if tag_code == 82)
	631	p = code_tag.index(b'\0', 4) + 1
	632	code_reader = io.BytesIO(code_tag[p:])
	633
	634	# Parse ABC (AVM2 ByteCode)
	635	def read_int(reader=None):
	636	if reader is None:
	637	reader = code_reader
	638	res = 0
	639	shift = 0
	640	for _ in range(5):
	641	buf = reader.read(1)
	642	assert len(buf) == 1
	643	b = struct.unpack('<B', buf)[0]
	644	res = res \| ((b & 0x7f) << shift)
	645	if b & 0x80 == 0:
	646	break
	647	shift += 7
	648	return res
	649
	650	def u30(reader=None):
	651	res = read_int(reader)
	652	assert res & 0xf0000000 == 0
	653	return res
	654	u32 = read_int
	655
	656	def s32(reader=None):
	657	v = read_int(reader)
	658	if v & 0x80000000 != 0:
	659	v = - ((v ^ 0xffffffff) + 1)
	660	return v
	661
	662	def read_string(reader=None):
	663	if reader is None:
	664	reader = code_reader
	665	slen = u30(reader)
	666	resb = reader.read(slen)
	667	assert len(resb) == slen
	668	return resb.decode('utf-8')
	669
	670	def read_bytes(count, reader=None):
	671	if reader is None:
	672	reader = code_reader
	673	resb = reader.read(count)
	674	assert len(resb) == count
	675	return resb
	676
	677	def read_byte(reader=None):
	678	resb = read_bytes(1, reader=reader)
	679	res = struct.unpack('<B', resb)[0]
	680	return res
	681
	682	# minor_version + major_version
	683	read_bytes(2 + 2)
	684
	685	# Constant pool
	686	int_count = u30()
	687	for _c in range(1, int_count):
	688	s32()
	689	uint_count = u30()
	690	for _c in range(1, uint_count):
	691	u32()
	692	double_count = u30()
	693	read_bytes((double_count-1) * 8)
	694	string_count = u30()
	695	constant_strings = [u'']
	696	for _c in range(1, string_count):
	697	s = read_string()
	698	constant_strings.append(s)
	699	namespace_count = u30()
	700	for _c in range(1, namespace_count):
	701	read_bytes(1) # kind
	702	u30() # name
	703	ns_set_count = u30()
	704	for _c in range(1, ns_set_count):
	705	count = u30()
	706	for _c2 in range(count):
	707	u30()
	708	multiname_count = u30()
	709	MULTINAME_SIZES = {
	710	0x07: 2, # QName
	711	0x0d: 2, # QNameA
	712	0x0f: 1, # RTQName
	713	0x10: 1, # RTQNameA
	714	0x11: 0, # RTQNameL
	715	0x12: 0, # RTQNameLA
	716	0x09: 2, # Multiname
	717	0x0e: 2, # MultinameA
	718	0x1b: 1, # MultinameL
	719	0x1c: 1, # MultinameLA
	720	}
	721	multinames = [u'']
	722	for _c in range(1, multiname_count):
	723	kind = u30()
	724	assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
	725	if kind == 0x07:
	726	u30() # namespace_idx
	727	name_idx = u30()
	728	multinames.append(constant_strings[name_idx])
	729	else:
	730	multinames.append('[MULTINAME kind: %d]' % kind)
	731	for _c2 in range(MULTINAME_SIZES[kind]):
	732	u30()
	733
	734	# Methods
	735	method_count = u30()
	736	MethodInfo = collections.namedtuple(
	737	'MethodInfo',
	738	['NEED_ARGUMENTS', 'NEED_REST'])
	739	method_infos = []
	740	for method_id in range(method_count):
	741	param_count = u30()
	742	u30() # return type
	743	for _ in range(param_count):
	744	u30() # param type
	745	u30() # name index (always 0 for youtube)
	746	flags = read_byte()
	747	if flags & 0x08 != 0:
	748	# Options present
	749	option_count = u30()
	750	for c in range(option_count):
	751	u30() # val
	752	read_bytes(1) # kind
	753	if flags & 0x80 != 0:
	754	# Param names present
	755	for _ in range(param_count):
	756	u30() # param name
	757	mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
	758	method_infos.append(mi)
	759
	760	# Metadata
	761	metadata_count = u30()
	762	for _c in range(metadata_count):
	763	u30() # name
	764	item_count = u30()
	765	for _c2 in range(item_count):
	766	u30() # key
	767	u30() # value
	768
	769	def parse_traits_info():
	770	trait_name_idx = u30()
	771	kind_full = read_byte()
	772	kind = kind_full & 0x0f
	773	attrs = kind_full >> 4
	774	methods = {}
	775	if kind in [0x00, 0x06]: # Slot or Const
	776	u30() # Slot id
	777	u30() # type_name_idx
	778	vindex = u30()
	779	if vindex != 0:
	780	read_byte() # vkind
	781	elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
	782	u30() # disp_id
	783	method_idx = u30()
	784	methods[multinames[trait_name_idx]] = method_idx
	785	elif kind == 0x04: # Class
	786	u30() # slot_id
	787	u30() # classi
	788	elif kind == 0x05: # Function
	789	u30() # slot_id
	790	function_idx = u30()
	791	methods[function_idx] = multinames[trait_name_idx]
	792	else:
	793	raise ExtractorError(u'Unsupported trait kind %d' % kind)
	794
	795	if attrs & 0x4 != 0: # Metadata present
	796	metadata_count = u30()
	797	for _c3 in range(metadata_count):
	798	u30() # metadata index
	799
	800	return methods
	801
	802	# Classes
	803	TARGET_CLASSNAME = u'SignatureDecipher'
	804	searched_idx = multinames.index(TARGET_CLASSNAME)
	805	searched_class_id = None
	806	class_count = u30()
	807	for class_id in range(class_count):
	808	name_idx = u30()
	809	if name_idx == searched_idx:
	810	# We found the class we're looking for!
	811	searched_class_id = class_id
	812	u30() # super_name idx
	813	flags = read_byte()
	814	if flags & 0x08 != 0: # Protected namespace is present
	815	u30() # protected_ns_idx
	816	intrf_count = u30()
	817	for _c2 in range(intrf_count):
	818	u30()
	819	u30() # iinit
	820	trait_count = u30()
	821	for _c2 in range(trait_count):
	822	parse_traits_info()
	823
	824	if searched_class_id is None:
	825	raise ExtractorError(u'Target class %r not found' %
	826	TARGET_CLASSNAME)
	827
	828	method_names = {}
	829	method_idxs = {}
	830	for class_id in range(class_count):
	831	u30() # cinit
	832	trait_count = u30()
	833	for _c2 in range(trait_count):
	834	trait_methods = parse_traits_info()
	835	if class_id == searched_class_id:
	836	method_names.update(trait_methods.items())
	837	method_idxs.update(dict(
	838	(idx, name)
	839	for name, idx in trait_methods.items()))
	840
	841	# Scripts
	842	script_count = u30()
	843	for _c in range(script_count):
	844	u30() # init
	845	trait_count = u30()
	846	for _c2 in range(trait_count):
	847	parse_traits_info()
	848
	849	# Method bodies
	850	method_body_count = u30()
	851	Method = collections.namedtuple('Method', ['code', 'local_count'])
	852	methods = {}
	853	for _c in range(method_body_count):
	854	method_idx = u30()
	855	u30() # max_stack
	856	local_count = u30()
	857	u30() # init_scope_depth
	858	u30() # max_scope_depth
	859	code_length = u30()
	860	code = read_bytes(code_length)
	861	if method_idx in method_idxs:
	862	m = Method(code, local_count)
	863	methods[method_idxs[method_idx]] = m
	864	exception_count = u30()
	865	for _c2 in range(exception_count):
	866	u30() # from
	867	u30() # to
	868	u30() # target
	869	u30() # exc_type
	870	u30() # var_name
	871	trait_count = u30()
	872	for _c2 in range(trait_count):
	873	parse_traits_info()
	874
	875	assert p + code_reader.tell() == len(code_tag)
	876	assert len(methods) == len(method_idxs)
	877
	878	method_pyfunctions = {}
	879
	880	def extract_function(func_name):
	881	if func_name in method_pyfunctions:
	882	return method_pyfunctions[func_name]
	883	if func_name not in methods:
	884	raise ExtractorError(u'Cannot find function %r' % func_name)
	885	m = methods[func_name]
	886
	887	def resfunc(args):
	888	registers = ['(this)'] + list(args) + [None] * m.local_count
	889	stack = []
	890	coder = io.BytesIO(m.code)
	891	while True:
	892	opcode = struct.unpack('!B', coder.read(1))[0]
	893	if opcode == 36: # pushbyte
	894	v = struct.unpack('!B', coder.read(1))[0]
	895	stack.append(v)
	896	elif opcode == 44: # pushstring
	897	idx = u30(coder)
	898	stack.append(constant_strings[idx])
	899	elif opcode == 48: # pushscope
	900	# We don't implement the scope register, so we'll just
	901	# ignore the popped value
	902	stack.pop()
	903	elif opcode == 70: # callproperty
	904	index = u30(coder)
	905	mname = multinames[index]
	906	arg_count = u30(coder)
	907	args = list(reversed(
	908	[stack.pop() for _ in range(arg_count)]))
	909	obj = stack.pop()
	910	if mname == u'split':
	911	assert len(args) == 1
	912	assert isinstance(args[0], compat_str)
	913	assert isinstance(obj, compat_str)
	914	if args[0] == u'':
	915	res = list(obj)
	916	else:
	917	res = obj.split(args[0])
	918	stack.append(res)
	919	elif mname == u'slice':
	920	assert len(args) == 1
	921	assert isinstance(args[0], int)
	922	assert isinstance(obj, list)
	923	res = obj[args[0]:]
	924	stack.append(res)
	925	elif mname == u'join':
	926	assert len(args) == 1
	927	assert isinstance(args[0], compat_str)
	928	assert isinstance(obj, list)
	929	res = args[0].join(obj)
	930	stack.append(res)
	931	elif mname in method_pyfunctions:
	932	stack.append(method_pyfunctions[mname](args))
	933	else:
	934	raise NotImplementedError(
	935	u'Unsupported property %r on %r'
	936	% (mname, obj))
	937	elif opcode == 72: # returnvalue
	938	res = stack.pop()
	939	return res
	940	elif opcode == 79: # callpropvoid
	941	index = u30(coder)
	942	mname = multinames[index]
	943	arg_count = u30(coder)
	944	args = list(reversed(
	945	[stack.pop() for _ in range(arg_count)]))
	946	obj = stack.pop()
	947	if mname == u'reverse':
	948	assert isinstance(obj, list)
	949	obj.reverse()
	950	else:
	951	raise NotImplementedError(
	952	u'Unsupported (void) property %r on %r'
	953	% (mname, obj))
	954	elif opcode == 93: # findpropstrict
	955	index = u30(coder)
	956	mname = multinames[index]
	957	res = extract_function(mname)
	958	stack.append(res)
	959	elif opcode == 97: # setproperty
	960	index = u30(coder)
	961	value = stack.pop()
	962	idx = stack.pop()
	963	obj = stack.pop()
	964	assert isinstance(obj, list)
	965	assert isinstance(idx, int)
	966	obj[idx] = value
	967	elif opcode == 98: # getlocal
	968	index = u30(coder)
	969	stack.append(registers[index])
	970	elif opcode == 99: # setlocal
	971	index = u30(coder)
	972	value = stack.pop()
	973	registers[index] = value
	974	elif opcode == 102: # getproperty
	975	index = u30(coder)
	976	pname = multinames[index]
	977	if pname == u'length':
	978	obj = stack.pop()
	979	assert isinstance(obj, list)
	980	stack.append(len(obj))
	981	else: # Assume attribute access
	982	idx = stack.pop()
	983	assert isinstance(idx, int)
	984	obj = stack.pop()
	985	assert isinstance(obj, list)
	986	stack.append(obj[idx])
	987	elif opcode == 128: # coerce
	988	u30(coder)
	989	elif opcode == 133: # coerce_s
	990	assert isinstance(stack[-1], (type(None), compat_str))
	991	elif opcode == 164: # modulo
	992	value2 = stack.pop()
	993	value1 = stack.pop()
	994	res = value1 % value2
	995	stack.append(res)
	996	elif opcode == 208: # getlocal_0
	997	stack.append(registers[0])
	998	elif opcode == 209: # getlocal_1
	999	stack.append(registers[1])
	1000	elif opcode == 210: # getlocal_2
	1001	stack.append(registers[2])
	1002	elif opcode == 211: # getlocal_3
	1003	stack.append(registers[3])
	1004	elif opcode == 214: # setlocal_2
	1005	registers[2] = stack.pop()
	1006	elif opcode == 215: # setlocal_3
	1007	registers[3] = stack.pop()
	1008	else:
	1009	raise NotImplementedError(
	1010	u'Unsupported opcode %d' % opcode)
	1011
	1012	method_pyfunctions[func_name] = resfunc
	1013	return resfunc
	1014
	1015	initial_function = extract_function(u'decipher')
	1016	return lambda s: initial_function([s])
	1017
	1018	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	1019	"""Turn the encrypted s field into a working signature"""
	1020
	1021	if player_url is not None:
	1022	if player_url.startswith(u'//'):
	1023	player_url = u'https:' + player_url
	1024	try:
	1025	player_id = (player_url, len(s))
	1026	if player_id not in self._player_cache:
	1027	func = self._extract_signature_function(
	1028	video_id, player_url, len(s)
	1029	)
	1030	self._player_cache[player_id] = func
	1031	func = self._player_cache[player_id]
	1032	if self._downloader.params.get('youtube_print_sig_code'):
	1033	self._print_sig_code(func, len(s))
	1034	return func(s)
	1035	except Exception:
	1036	tb = traceback.format_exc()
	1037	self._downloader.report_warning(
	1038	u'Automatic signature extraction failed: ' + tb)
	1039
	1040	self._downloader.report_warning(
	1041	u'Warning: Falling back to static signature algorithm')
	1042
	1043	return self._static_decrypt_signature(
	1044	s, video_id, player_url, age_gate)
	1045
	1046	def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
	1047	if age_gate:
	1048	# The videos with age protection use another player, so the
	1049	# algorithms can be different.
	1050	if len(s) == 86:
	1051	return s[2:63] + s[82] + s[64:82] + s[63]
	1052
	1053	if len(s) == 93:
	1054	return s[86:29:-1] + s[88] + s[28:5:-1]
	1055	elif len(s) == 92:
	1056	return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
	1057	elif len(s) == 91:
	1058	return s[84:27:-1] + s[86] + s[26:5:-1]
	1059	elif len(s) == 90:
	1060	return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
	1061	elif len(s) == 89:
	1062	return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
	1063	elif len(s) == 88:
	1064	return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
	1065	elif len(s) == 87:
	1066	return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
	1067	elif len(s) == 86:
	1068	return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
	1069	elif len(s) == 85:
	1070	return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
	1071	elif len(s) == 84:
	1072	return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
	1073	elif len(s) == 83:
	1074	return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
	1075	elif len(s) == 82:
	1076	return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
	1077	elif len(s) == 81:
	1078	return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
	1079	elif len(s) == 80:
	1080	return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
	1081	elif len(s) == 79:
	1082	return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
	1083
	1084	else:
	1085	raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
	1086
	1087	def _get_available_subtitles(self, video_id, webpage):
	1088	try:
	1089	sub_list = self._download_webpage(
	1090	'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	1091	video_id, note=False)
	1092	except ExtractorError as err:
	1093	self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
	1094	return {}
	1095	lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
	1096
	1097	sub_lang_list = {}
	1098	for l in lang_list:
	1099	lang = l[1]
	1100	params = compat_urllib_parse.urlencode({
	1101	'lang': lang,
	1102	'v': video_id,
	1103	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	1104	'name': l[0].encode('utf-8'),
	1105	})
	1106	url = u'http://www.youtube.com/api/timedtext?' + params
	1107	sub_lang_list[lang] = url
	1108	if not sub_lang_list:
	1109	self._downloader.report_warning(u'video doesn\'t have subtitles')
	1110	return {}
	1111	return sub_lang_list
	1112
	1113	def _get_available_automatic_caption(self, video_id, webpage):
	1114	"""We need the webpage for getting the captions url, pass it as an
	1115	argument to speed up the process."""
	1116	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	1117	self.to_screen(u'%s: Looking for automatic captions' % video_id)
	1118	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	1119	err_msg = u'Couldn\'t find automatic captions for %s' % video_id
	1120	if mobj is None:
	1121	self._downloader.report_warning(err_msg)
	1122	return {}
	1123	player_config = json.loads(mobj.group(1))
	1124	try:
	1125	args = player_config[u'args']
	1126	caption_url = args[u'ttsurl']
	1127	timestamp = args[u'timestamp']
	1128	# We get the available subtitles
	1129	list_params = compat_urllib_parse.urlencode({
	1130	'type': 'list',
	1131	'tlangs': 1,
	1132	'asrs': 1,
	1133	})
	1134	list_url = caption_url + '&' + list_params
	1135	list_page = self._download_webpage(list_url, video_id)
	1136	caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
	1137	original_lang_node = caption_list.find('track')
	1138	if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
	1139	self._downloader.report_warning(u'Video doesn\'t have automatic captions')
	1140	return {}
	1141	original_lang = original_lang_node.attrib['lang_code']
	1142
	1143	sub_lang_list = {}
	1144	for lang_node in caption_list.findall('target'):
	1145	sub_lang = lang_node.attrib['lang_code']
	1146	params = compat_urllib_parse.urlencode({
	1147	'lang': original_lang,
	1148	'tlang': sub_lang,
	1149	'fmt': sub_format,
	1150	'ts': timestamp,
	1151	'kind': 'asr',
	1152	})
	1153	sub_lang_list[sub_lang] = caption_url + '&' + params
	1154	return sub_lang_list
	1155	# An extractor error can be raise by the download process if there are
	1156	# no automatic captions but there are subtitles
	1157	except (KeyError, ExtractorError):
	1158	self._downloader.report_warning(err_msg)
	1159	return {}
	1160
	1161	def _print_formats(self, formats):
	1162	print('Available formats:')
	1163	for x in formats:
	1164	print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
	1165	self._video_dimensions.get(x, '???'),
	1166	' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
	1167
	1168	def _extract_id(self, url):
	1169	mobj = re.match(self._VALID_URL, url, re.VERBOSE)
	1170	if mobj is None:
	1171	raise ExtractorError(u'Invalid URL: %s' % url)
	1172	video_id = mobj.group(2)
	1173	return video_id
	1174
	1175	def _get_video_url_list(self, url_map):
	1176	"""
	1177	Transform a dictionary in the format {itag:url} to a list of (itag, url)
	1178	with the requested formats.
	1179	"""
	1180	req_format = self._downloader.params.get('format', None)
	1181	format_limit = self._downloader.params.get('format_limit', None)
	1182	available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
	1183	if format_limit is not None and format_limit in available_formats:
	1184	format_list = available_formats[available_formats.index(format_limit):]
	1185	else:
	1186	format_list = available_formats
	1187	existing_formats = [x for x in format_list if x in url_map]
	1188	if len(existing_formats) == 0:
	1189	raise ExtractorError(u'no known formats available for video')
	1190	if self._downloader.params.get('listformats', None):
	1191	self._print_formats(existing_formats)
	1192	return
	1193	if req_format is None or req_format == 'best':
	1194	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	1195	elif req_format == 'worst':
	1196	video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
	1197	elif req_format in ('-1', 'all'):
	1198	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	1199	else:
	1200	# Specific formats. We pick the first in a slash-delimeted sequence.
	1201	# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
	1202	# available in the specified format. For example,
	1203	# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
	1204	# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
	1205	# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
	1206	req_formats = req_format.split('/')
	1207	video_url_list = None
	1208	for rf in req_formats:
	1209	if rf in url_map:
	1210	video_url_list = [(rf, url_map[rf])]
	1211	break
	1212	if rf in self._video_formats_map:
	1213	for srf in self._video_formats_map[rf]:
	1214	if srf in url_map:
	1215	video_url_list = [(srf, url_map[srf])]
	1216	break
	1217	else:
	1218	continue
	1219	break
	1220	if video_url_list is None:
	1221	raise ExtractorError(u'requested format not available')
	1222	return video_url_list
	1223
	1224	def _extract_from_m3u8(self, manifest_url, video_id):
	1225	url_map = {}
	1226	def _get_urls(_manifest):
	1227	lines = _manifest.split('\n')
	1228	urls = filter(lambda l: l and not l.startswith('#'),
	1229	lines)
	1230	return urls
	1231	manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
	1232	formats_urls = _get_urls(manifest)
	1233	for format_url in formats_urls:
	1234	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	1235	url_map[itag] = format_url
	1236	return url_map
	1237
	1238	def _extract_annotations(self, video_id):
	1239	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	1240	return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
	1241
	1242	def _real_extract(self, url):
	1243	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	1244	mobj = re.search(self._NEXT_URL_RE, url)
	1245	if mobj:
	1246	url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	1247	video_id = self._extract_id(url)
	1248
	1249	# Get video webpage
	1250	self.report_video_webpage_download(video_id)
	1251	url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
	1252	request = compat_urllib_request.Request(url)
	1253	try:
	1254	video_webpage_bytes = compat_urllib_request.urlopen(request).read()
	1255	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	1256	raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
	1257
	1258	video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
	1259
	1260	# Attempt to extract SWF player URL
	1261	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1262	if mobj is not None:
	1263	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1264	else:
	1265	player_url = None
	1266
	1267	# Get video info
	1268	self.report_video_info_webpage_download(video_id)
	1269	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	1270	self.report_age_confirmation()
	1271	age_gate = True
	1272	# We simulate the access to the video from www.youtube.com/v/{video_id}
	1273	# this can be viewed without login into Youtube
	1274	data = compat_urllib_parse.urlencode({'video_id': video_id,
	1275	'el': 'embedded',
	1276	'gl': 'US',
	1277	'hl': 'en',
	1278	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	1279	'asv': 3,
	1280	'sts':'1588',
	1281	})
	1282	video_info_url = 'https://www.youtube.com/get_video_info?' + data
	1283	video_info_webpage = self._download_webpage(video_info_url, video_id,
	1284	note=False,
	1285	errnote='unable to download video info webpage')
	1286	video_info = compat_parse_qs(video_info_webpage)
	1287	else:
	1288	age_gate = False
	1289	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1290	video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1291	% (video_id, el_type))
	1292	video_info_webpage = self._download_webpage(video_info_url, video_id,
	1293	note=False,
	1294	errnote='unable to download video info webpage')
	1295	video_info = compat_parse_qs(video_info_webpage)
	1296	if 'token' in video_info:
	1297	break
	1298	if 'token' not in video_info:
	1299	if 'reason' in video_info:
	1300	raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
	1301	else:
	1302	raise ExtractorError(u'"token" parameter not in video info for unknown reason')
	1303
	1304	if 'view_count' in video_info:
	1305	view_count = int(video_info['view_count'][0])
	1306	else:
	1307	view_count = None
	1308
	1309	# Check for "rental" videos
	1310	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	1311	raise ExtractorError(u'"rental" videos not supported')
	1312
	1313	# Start extracting information
	1314	self.report_information_extraction(video_id)
	1315
	1316	# uploader
	1317	if 'author' not in video_info:
	1318	raise ExtractorError(u'Unable to extract uploader name')
	1319	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	1320
	1321	# uploader_id
	1322	video_uploader_id = None
	1323	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	1324	if mobj is not None:
	1325	video_uploader_id = mobj.group(1)
	1326	else:
	1327	self._downloader.report_warning(u'unable to extract uploader nickname')
	1328
	1329	# title
	1330	if 'title' in video_info:
	1331	video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
	1332	else:
	1333	self._downloader.report_warning(u'Unable to extract video title')
	1334	video_title = u'_'
	1335
	1336	# thumbnail image
	1337	# We try first to get a high quality image:
	1338	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	1339	video_webpage, re.DOTALL)
	1340	if m_thumb is not None:
	1341	video_thumbnail = m_thumb.group(1)
	1342	elif 'thumbnail_url' not in video_info:
	1343	self._downloader.report_warning(u'unable to extract video thumbnail')
	1344	video_thumbnail = None
	1345	else: # don't panic if we can't find it
	1346	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	1347
	1348	# upload date
	1349	upload_date = None
	1350	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1351	if mobj is not None:
	1352	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1353	upload_date = unified_strdate(upload_date)
	1354
	1355	# description
	1356	video_description = get_element_by_id("eow-description", video_webpage)
	1357	if video_description:
	1358	video_description = clean_html(video_description)
	1359	else:
	1360	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	1361	if fd_mobj:
	1362	video_description = unescapeHTML(fd_mobj.group(1))
	1363	else:
	1364	video_description = u''
	1365
	1366	# subtitles
	1367	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	1368
	1369	if self._downloader.params.get('listsubtitles', False):
	1370	self._list_available_subtitles(video_id, video_webpage)
	1371	return
	1372
	1373	if 'length_seconds' not in video_info:
	1374	self._downloader.report_warning(u'unable to extract video duration')
	1375	video_duration = ''
	1376	else:
	1377	video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
	1378
	1379	# annotations
	1380	video_annotations = None
	1381	if self._downloader.params.get('writeannotations', False):
	1382	video_annotations = self._extract_annotations(video_id)
	1383
	1384	# Decide which formats to download
	1385
	1386	try:
	1387	mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
	1388	if not mobj:
	1389	raise ValueError('Could not find vevo ID')
	1390	info = json.loads(mobj.group(1))
	1391	args = info['args']
	1392	# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
	1393	# this signatures are encrypted
	1394	if 'url_encoded_fmt_stream_map' not in args:
	1395	raise ValueError(u'No stream_map present') # caught below
	1396	re_signature = re.compile(r'[&,]s=')
	1397	m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
	1398	if m_s is not None:
	1399	self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
	1400	video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
	1401	m_s = re_signature.search(args.get('adaptive_fmts', u''))
	1402	if m_s is not None:
	1403	if 'adaptive_fmts' in video_info:
	1404	video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
	1405	else:
	1406	video_info['adaptive_fmts'] = [args['adaptive_fmts']]
	1407	except ValueError:
	1408	pass
	1409
	1410	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1411	self.report_rtmp_download()
	1412	video_url_list = [(None, video_info['conn'][0])]
	1413	elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
	1414	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
	1415	if 'rtmpe%3Dyes' in encoded_url_map:
	1416	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1417	url_map = {}
	1418	for url_data_str in encoded_url_map.split(','):
	1419	url_data = compat_parse_qs(url_data_str)
	1420	if 'itag' in url_data and 'url' in url_data:
	1421	url = url_data['url'][0]
	1422	if 'sig' in url_data:
	1423	url += '&signature=' + url_data['sig'][0]
	1424	elif 's' in url_data:
	1425	encrypted_sig = url_data['s'][0]
	1426	if self._downloader.params.get('verbose'):
	1427	if age_gate:
	1428	if player_url is None:
	1429	player_version = 'unknown'
	1430	else:
	1431	player_version = self._search_regex(
	1432	r'-(.+)\.swf$', player_url,
	1433	u'flash player', fatal=False)
	1434	player_desc = 'flash player %s' % player_version
	1435	else:
	1436	player_version = self._search_regex(
	1437	r'html5player-(.+?)\.js', video_webpage,
	1438	'html5 player', fatal=False)
	1439	player_desc = u'html5 player %s' % player_version
	1440
	1441	parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
	1442	self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
	1443	(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
	1444
	1445	if not age_gate:
	1446	jsplayer_url_json = self._search_regex(
	1447	r'"assets":.+?"js":\s*("[^"]+")',
	1448	video_webpage, u'JS player URL')
	1449	player_url = json.loads(jsplayer_url_json)
	1450
	1451	signature = self._decrypt_signature(
	1452	encrypted_sig, video_id, player_url, age_gate)
	1453	url += '&signature=' + signature
	1454	if 'ratebypass' not in url:
	1455	url += '&ratebypass=yes'
	1456	url_map[url_data['itag'][0]] = url
	1457	video_url_list = self._get_video_url_list(url_map)
	1458	if not video_url_list:
	1459	return
	1460	elif video_info.get('hlsvp'):
	1461	manifest_url = video_info['hlsvp'][0]
	1462	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1463	video_url_list = self._get_video_url_list(url_map)
	1464	if not video_url_list:
	1465	return
	1466
	1467	else:
	1468	raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1469
	1470	results = []
	1471	for itag, video_real_url in video_url_list:
	1472	# Extension
	1473	video_extension = self._video_extensions.get(itag, 'flv')
	1474
	1475	video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
	1476	self._video_dimensions.get(itag, '???'),
	1477	' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
	1478
	1479	results.append({
	1480	'id': video_id,
	1481	'url': video_real_url,
	1482	'uploader': video_uploader,
	1483	'uploader_id': video_uploader_id,
	1484	'upload_date': upload_date,
	1485	'title': video_title,
	1486	'ext': video_extension,
	1487	'format': video_format,
	1488	'format_id': itag,
	1489	'thumbnail': video_thumbnail,
	1490	'description': video_description,
	1491	'player_url': player_url,
	1492	'subtitles': video_subtitles,
	1493	'duration': video_duration,
	1494	'age_limit': 18 if age_gate else 0,
	1495	'annotations': video_annotations,
	1496	'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
	1497	'view_count': view_count,
	1498	})
	1499	return results
	1500
	1501	class YoutubePlaylistIE(InfoExtractor):
	1502	IE_DESC = u'YouTube.com playlists'
	1503	_VALID_URL = r"""(?:
	1504	(?:https?://)?
	1505	(?:\w+\.)?
	1506	youtube\.com/
	1507	(?:
	1508	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch)
	1509	\? (?:.?&)? (?:p\|a\|list)=
	1510	\| p/
	1511	)
	1512	((?:PL\|EC\|UU\|FL)?[0-9A-Za-z-_]{10,})
	1513	.*
	1514	\|
	1515	((?:PL\|EC\|UU\|FL)[0-9A-Za-z-_]{10,})
	1516	)"""
	1517	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
	1518	_MAX_RESULTS = 50
	1519	IE_NAME = u'youtube:playlist'
	1520
	1521	@classmethod
	1522	def suitable(cls, url):
	1523	"""Receives a URL and returns True if suitable for this IE."""
	1524	return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
	1525
	1526	def _real_extract(self, url):
	1527	# Extract playlist id
	1528	mobj = re.match(self._VALID_URL, url, re.VERBOSE)
	1529	if mobj is None:
	1530	raise ExtractorError(u'Invalid URL: %s' % url)
	1531	playlist_id = mobj.group(1) or mobj.group(2)
	1532
	1533	# Check if it's a video-specific URL
	1534	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1535	if 'v' in query_dict:
	1536	video_id = query_dict['v'][0]
	1537	if self._downloader.params.get('noplaylist'):
	1538	self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
	1539	return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
	1540	else:
	1541	self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1542
	1543	# Download playlist videos from API
	1544	videos = []
	1545
	1546	for page_num in itertools.count(1):
	1547	start_index = self._MAX_RESULTS * (page_num - 1) + 1
	1548	if start_index >= 1000:
	1549	self._downloader.report_warning(u'Max number of results reached')
	1550	break
	1551	url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
	1552	page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
	1553
	1554	try:
	1555	response = json.loads(page)
	1556	except ValueError as err:
	1557	raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
	1558
	1559	if 'feed' not in response:
	1560	raise ExtractorError(u'Got a malformed response from YouTube API')
	1561	playlist_title = response['feed']['title']['$t']
	1562	if 'entry' not in response['feed']:
	1563	# Number of videos is a multiple of self._MAX_RESULTS
	1564	break
	1565
	1566	for entry in response['feed']['entry']:
	1567	index = entry['yt$position']['$t']
	1568	if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
	1569	videos.append((
	1570	index,
	1571	'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
	1572	))
	1573
	1574	videos = [v[1] for v in sorted(videos)]
	1575
	1576	url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
	1577	return [self.playlist_result(url_results, playlist_id, playlist_title)]
	1578
	1579
	1580	class YoutubeChannelIE(InfoExtractor):
	1581	IE_DESC = u'YouTube.com channels'
	1582	_VALID_URL = r"^(?:https?://)?(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
	1583	_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
	1584	_MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
	1585	IE_NAME = u'youtube:channel'
	1586
	1587	def extract_videos_from_page(self, page):
	1588	ids_in_page = []
	1589	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1590	if mobj.group(1) not in ids_in_page:
	1591	ids_in_page.append(mobj.group(1))
	1592	return ids_in_page
	1593
	1594	def _real_extract(self, url):
	1595	# Extract channel id
	1596	mobj = re.match(self._VALID_URL, url)
	1597	if mobj is None:
	1598	raise ExtractorError(u'Invalid URL: %s' % url)
	1599
	1600	# Download channel page
	1601	channel_id = mobj.group(1)
	1602	video_ids = []
	1603	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1604	channel_page = self._download_webpage(url, channel_id)
	1605	if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
	1606	autogenerated = True
	1607	else:
	1608	autogenerated = False
	1609
	1610	if autogenerated:
	1611	# The videos are contained in a single page
	1612	# the ajax pages can't be used, they are empty
	1613	video_ids = self.extract_videos_from_page(channel_page)
	1614	else:
	1615	# Download all channel pages using the json-based channel_ajax query
	1616	for pagenum in itertools.count(1):
	1617	url = self._MORE_PAGES_URL % (pagenum, channel_id)
	1618	page = self._download_webpage(url, channel_id,
	1619	u'Downloading page #%s' % pagenum)
	1620
	1621	page = json.loads(page)
	1622
	1623	ids_in_page = self.extract_videos_from_page(page['content_html'])
	1624	video_ids.extend(ids_in_page)
	1625
	1626	if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
	1627	break
	1628
	1629	self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
	1630
	1631	urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
	1632	url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
	1633	return [self.playlist_result(url_entries, channel_id)]
	1634
	1635
	1636	class YoutubeUserIE(InfoExtractor):
	1637	IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
	1638	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
	1639	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
	1640	_GDATA_PAGE_SIZE = 50
	1641	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1642	IE_NAME = u'youtube:user'
	1643
	1644	@classmethod
	1645	def suitable(cls, url):
	1646	# Don't return True if the url can be extracted with other youtube
	1647	# extractor, the regex would is too permissive and it would match.
	1648	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1649	if any(ie.suitable(url) for ie in other_ies): return False
	1650	else: return super(YoutubeUserIE, cls).suitable(url)
	1651
	1652	def _real_extract(self, url):
	1653	# Extract username
	1654	mobj = re.match(self._VALID_URL, url)
	1655	if mobj is None:
	1656	raise ExtractorError(u'Invalid URL: %s' % url)
	1657
	1658	username = mobj.group(1)
	1659
	1660	# Download video ids using YouTube Data API. Result size per
	1661	# query is limited (currently to 50 videos) so we need to query
	1662	# page by page until there are no video ids - it means we got
	1663	# all of them.
	1664
	1665	video_ids = []
	1666
	1667	for pagenum in itertools.count(0):
	1668	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1669
	1670	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1671	page = self._download_webpage(gdata_url, username,
	1672	u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
	1673
	1674	try:
	1675	response = json.loads(page)
	1676	except ValueError as err:
	1677	raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
	1678	if 'entry' not in response['feed']:
	1679	# Number of videos is a multiple of self._MAX_RESULTS
	1680	break
	1681
	1682	# Extract video identifiers
	1683	ids_in_page = []
	1684	for entry in response['feed']['entry']:
	1685	ids_in_page.append(entry['id']['$t'].split('/')[-1])
	1686	video_ids.extend(ids_in_page)
	1687
	1688	# A little optimization - if current page is not
	1689	# "full", ie. does not contain PAGE_SIZE video ids then
	1690	# we can assume that this page is the last one - there
	1691	# are no more ids on further pages - no need to query
	1692	# again.
	1693
	1694	if len(ids_in_page) < self._GDATA_PAGE_SIZE:
	1695	break
	1696
	1697	urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
	1698	url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
	1699	return [self.playlist_result(url_results, playlist_title = username)]
	1700
	1701	class YoutubeSearchIE(SearchInfoExtractor):
	1702	IE_DESC = u'YouTube.com searches'
	1703	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1704	_MAX_RESULTS = 1000
	1705	IE_NAME = u'youtube:search'
	1706	_SEARCH_KEY = 'ytsearch'
	1707
	1708	def report_download_page(self, query, pagenum):
	1709	"""Report attempt to download search page with given number."""
	1710	self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
	1711
	1712	def _get_n_results(self, query, n):
	1713	"""Get a specified number of results for a query"""
	1714
	1715	video_ids = []
	1716	pagenum = 0
	1717	limit = n
	1718
	1719	while (50 * pagenum) < limit:
	1720	self.report_download_page(query, pagenum+1)
	1721	result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
	1722	request = compat_urllib_request.Request(result_url)
	1723	try:
	1724	data = compat_urllib_request.urlopen(request).read().decode('utf-8')
	1725	except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
	1726	raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
	1727	api_response = json.loads(data)['data']
	1728
	1729	if not 'items' in api_response:
	1730	raise ExtractorError(u'[youtube] No video results')
	1731
	1732	new_ids = list(video['id'] for video in api_response['items'])
	1733	video_ids += new_ids
	1734
	1735	limit = min(n, api_response['totalItems'])
	1736	pagenum += 1
	1737
	1738	if len(video_ids) > n:
	1739	video_ids = video_ids[:n]
	1740	videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
	1741	return self.playlist_result(videos, query)
	1742
	1743	class YoutubeSearchDateIE(YoutubeSearchIE):
	1744	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1745	_SEARCH_KEY = 'ytsearchdate'
	1746	IE_DESC = u'YouTube.com searches, newest videos first'
	1747
	1748	class YoutubeShowIE(InfoExtractor):
	1749	IE_DESC = u'YouTube.com (multi-season) shows'
	1750	_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
	1751	IE_NAME = u'youtube:show'
	1752
	1753	def _real_extract(self, url):
	1754	mobj = re.match(self._VALID_URL, url)
	1755	show_name = mobj.group(1)
	1756	webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
	1757	# There's one playlist for each season of the show
	1758	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1759	self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
	1760	return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
	1761
	1762
	1763	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1764	"""
	1765	Base class for extractors that fetch info from
	1766	http://www.youtube.com/feed_ajax
	1767	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1768	"""
	1769	_LOGIN_REQUIRED = True
	1770	_PAGING_STEP = 30
	1771	# use action_load_personal_feed instead of action_load_system_feed
	1772	_PERSONAL_FEED = False
	1773
	1774	@property
	1775	def _FEED_TEMPLATE(self):
	1776	action = 'action_load_system_feed'
	1777	if self._PERSONAL_FEED:
	1778	action = 'action_load_personal_feed'
	1779	return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1780
	1781	@property
	1782	def IE_NAME(self):
	1783	return u'youtube:%s' % self._FEED_NAME
	1784
	1785	def _real_initialize(self):
	1786	self._login()
	1787
	1788	def _real_extract(self, url):
	1789	feed_entries = []
	1790	# The step argument is available only in 2.7 or higher
	1791	for i in itertools.count(0):
	1792	paging = i*self._PAGING_STEP
	1793	info = self._download_webpage(self._FEED_TEMPLATE % paging,
	1794	u'%s feed' % self._FEED_NAME,
	1795	u'Downloading page %s' % i)
	1796	info = json.loads(info)
	1797	feed_html = info['feed_html']
	1798	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1799	ids = orderedSet(m.group(1) for m in m_ids)
	1800	feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
	1801	if info['paging'] is None:
	1802	break
	1803	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1804
	1805	class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
	1806	IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
	1807	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1808	_FEED_NAME = 'subscriptions'
	1809	_PLAYLIST_TITLE = u'Youtube Subscriptions'
	1810
	1811	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1812	IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
	1813	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1814	_FEED_NAME = 'recommended'
	1815	_PLAYLIST_TITLE = u'Youtube Recommended videos'
	1816
	1817	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1818	IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
	1819	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1820	_FEED_NAME = 'watch_later'
	1821	_PLAYLIST_TITLE = u'Youtube Watch Later'
	1822	_PAGING_STEP = 100
	1823	_PERSONAL_FEED = True
	1824
	1825	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1826	IE_NAME = u'youtube:favorites'
	1827	IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
	1828	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1829	_LOGIN_REQUIRED = True
	1830
	1831	def _real_extract(self, url):
	1832	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1833	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
	1834	return self.url_result(playlist_id, 'YoutubePlaylist')
	1835
	1836
	1837	class YoutubeTruncatedURLIE(InfoExtractor):
	1838	IE_NAME = 'youtube:truncated_url'
	1839	IE_DESC = False # Do not list
	1840	_VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
	1841
	1842	def _real_extract(self, url):
	1843	raise ExtractorError(
	1844	u'Did you forget to quote the URL? Remember that & is a meta '
	1845	u'character in most shells, so you want to put the URL in quotes, '
	1846	u'like youtube-dl '
	1847	u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
	1848	u' (or simply youtube-dl BaW_jenozKc ).',
	1849	expected=True)