jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	import collections
	4	import errno
	5	import io
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import struct
	11	import traceback
	12	import zlib
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from .subtitles import SubtitlesInfoExtractor
	16	from ..jsinterp import JSInterpreter
	17	from ..utils import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse,
	21	compat_urllib_request,
	22	compat_urlparse,
	23	compat_str,
	24
	25	clean_html,
	26	get_cachedir,
	27	get_element_by_id,
	28	get_element_by_attribute,
	29	ExtractorError,
	30	int_or_none,
	31	PagedList,
	32	unescapeHTML,
	33	unified_strdate,
	34	orderedSet,
	35	write_json_file,
	36	uppercase_escape,
	37	)
	38
	39	class YoutubeBaseInfoExtractor(InfoExtractor):
	40	"""Provide base functions for Youtube extractors"""
	41	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	42	_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	43	_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	44	_NETRC_MACHINE = 'youtube'
	45	# If True it will raise an error if no login info is provided
	46	_LOGIN_REQUIRED = False
	47
	48	def _set_language(self):
	49	return bool(self._download_webpage(
	50	self._LANG_URL, None,
	51	note=u'Setting language', errnote='unable to set language',
	52	fatal=False))
	53
	54	def _login(self):
	55	(username, password) = self._get_login_info()
	56	# No authentication to be performed
	57	if username is None:
	58	if self._LOGIN_REQUIRED:
	59	raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	60	return False
	61
	62	login_page = self._download_webpage(
	63	self._LOGIN_URL, None,
	64	note=u'Downloading login page',
	65	errnote=u'unable to fetch login page', fatal=False)
	66	if login_page is False:
	67	return
	68
	69	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	70	login_page, u'Login GALX parameter')
	71
	72	# Log in
	73	login_form_strs = {
	74	u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	75	u'Email': username,
	76	u'GALX': galx,
	77	u'Passwd': password,
	78	u'PersistentCookie': u'yes',
	79	u'_utf8': u'霱',
	80	u'bgresponse': u'js_disabled',
	81	u'checkConnection': u'',
	82	u'checkedDomains': u'youtube',
	83	u'dnConn': u'',
	84	u'pstMsg': u'0',
	85	u'rmShown': u'1',
	86	u'secTok': u'',
	87	u'signIn': u'Sign in',
	88	u'timeStmp': u'',
	89	u'service': u'youtube',
	90	u'uilel': u'3',
	91	u'hl': u'en_US',
	92	}
	93	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	94	# chokes on unicode
	95	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
	96	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	97
	98	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	99	login_results = self._download_webpage(
	100	req, None,
	101	note=u'Logging in', errnote=u'unable to log in', fatal=False)
	102	if login_results is False:
	103	return False
	104	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	105	self._downloader.report_warning(u'unable to log in: bad username or password')
	106	return False
	107	return True
	108
	109	def _confirm_age(self):
	110	age_form = {
	111	'next_url': '/',
	112	'action_confirm': 'Confirm',
	113	}
	114	req = compat_urllib_request.Request(self._AGE_URL,
	115	compat_urllib_parse.urlencode(age_form).encode('ascii'))
	116
	117	self._download_webpage(
	118	req, None,
	119	note=u'Confirming age', errnote=u'Unable to confirm age')
	120	return True
	121
	122	def _real_initialize(self):
	123	if self._downloader is None:
	124	return
	125	if not self._set_language():
	126	return
	127	if not self._login():
	128	return
	129	self._confirm_age()
	130
	131
	132	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	133	IE_DESC = u'YouTube.com'
	134	_VALID_URL = r"""(?x)^
	135	(
	136	(?:https?://\|//)? # http(s):// or protocol-independent URL (optional)
	137	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	138	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	139	(?:www\.)?pwnyoutube\.com/\|
	140	(?:www\.)?yourepeat\.com/\|
	141	tube\.majestyc\.net/\|
	142	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	143	(?:.*?\#/)? # handle anchor (#/) redirect urls
	144	(?: # the various things that can precede the ID:
	145	(?:(?:v\|embed\|e)/) # v/ or embed/ or e/
	146	\|(?: # or the v= param in all its forms
	147	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	148	(?:\?\|\#!?) # the params delimiter ? or # or #!
	149	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	150	v=
	151	)
	152	))
	153	\|youtu\.be/ # just youtu.be/xxxx
	154	)
	155	)? # all until now is optional -> you can pass the naked ID
	156	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	157	(?(1).+)? # if we found the ID, everything can follow
	158	$"""
	159	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	160	_formats = {
	161	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	162	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	163	'13': {'ext': '3gp'},
	164	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	165	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	166	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	167	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	168	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	169	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	170	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	171	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	172	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	173	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	174	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	175	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	176
	177
	178	# 3d videos
	179	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	180	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	181	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	182	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	183	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	184	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	185	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	186
	187	# Apple HTTP Live Streaming
	188	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	189	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	190	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	191	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	192	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	193	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	194	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	195
	196	# DASH mp4 video
	197	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	198	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	199	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	200	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	201	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	202	'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	203	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	204	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	205
	206	# Dash mp4 audio
	207	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
	208	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
	209	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
	210
	211	# Dash webm
	212	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	213	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	214	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	215	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	216	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	217	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
	218	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
	219	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
	220	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
	221	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
	222	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
	223	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
	224	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
	225
	226	# Dash webm audio
	227	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
	228	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
	229
	230	# RTMP (unnamed)
	231	'_rtmp': {'protocol': 'rtmp'},
	232	}
	233
	234	IE_NAME = u'youtube'
	235	_TESTS = [
	236	{
	237	u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
	238	u"file": u"BaW_jenozKc.mp4",
	239	u"info_dict": {
	240	u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
	241	u"uploader": u"Philipp Hagemeister",
	242	u"uploader_id": u"phihag",
	243	u"upload_date": u"20121002",
	244	u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
	245	}
	246	},
	247	{
	248	u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
	249	u"file": u"UxxajLWwzqY.mp4",
	250	u"note": u"Test generic use_cipher_signature video (#897)",
	251	u"info_dict": {
	252	u"upload_date": u"20120506",
	253	u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
	254	u"description": u"md5:5b292926389560516e384ac437c0ec07",
	255	u"uploader": u"Icona Pop",
	256	u"uploader_id": u"IconaPop"
	257	}
	258	},
	259	{
	260	u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
	261	u"file": u"07FYdnEawAQ.mp4",
	262	u"note": u"Test VEVO video with age protection (#956)",
	263	u"info_dict": {
	264	u"upload_date": u"20130703",
	265	u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
	266	u"description": u"md5:64249768eec3bc4276236606ea996373",
	267	u"uploader": u"justintimberlakeVEVO",
	268	u"uploader_id": u"justintimberlakeVEVO"
	269	}
	270	},
	271	{
	272	u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
	273	u"file": u"yZIXLfi8CZQ.mp4",
	274	u"note": u"Embed-only video (#1746)",
	275	u"info_dict": {
	276	u"upload_date": u"20120608",
	277	u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
	278	u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
	279	u"uploader": u"SET India",
	280	u"uploader_id": u"setindia"
	281	}
	282	},
	283	{
	284	u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
	285	u"file": u"a9LDPn-MO4I.m4a",
	286	u"note": u"256k DASH audio (format 141) via DASH manifest",
	287	u"info_dict": {
	288	u"upload_date": "20121002",
	289	u"uploader_id": "8KVIDEO",
	290	u"description": "No description available.",
	291	u"uploader": "8KVIDEO",
	292	u"title": "UHDTV TEST 8K VIDEO.mp4"
	293	},
	294	u"params": {
	295	u"youtube_include_dash_manifest": True,
	296	u"format": "141",
	297	},
	298	},
	299	# DASH manifest with encrypted signature
	300	{
	301	u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	302	u'info_dict': {
	303	u'id': u'IB3lcPjvWLA',
	304	u'ext': u'm4a',
	305	u'title': u'Afrojack - The Spark ft. Spree Wilson',
	306	u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
	307	u'uploader': u'AfrojackVEVO',
	308	u'uploader_id': u'AfrojackVEVO',
	309	u'upload_date': u'20131011',
	310	},
	311	u"params": {
	312	u'youtube_include_dash_manifest': True,
	313	u'format': '141',
	314	},
	315	},
	316	]
	317
	318
	319	@classmethod
	320	def suitable(cls, url):
	321	"""Receives a URL and returns True if suitable for this IE."""
	322	if YoutubePlaylistIE.suitable(url): return False
	323	return re.match(cls._VALID_URL, url) is not None
	324
	325	def __init__(self, args, *kwargs):
	326	super(YoutubeIE, self).__init__(args, *kwargs)
	327	self._player_cache = {}
	328
	329	def report_video_info_webpage_download(self, video_id):
	330	"""Report attempt to download video info webpage."""
	331	self.to_screen(u'%s: Downloading video info webpage' % video_id)
	332
	333	def report_information_extraction(self, video_id):
	334	"""Report attempt to extract video information."""
	335	self.to_screen(u'%s: Extracting video information' % video_id)
	336
	337	def report_unavailable_format(self, video_id, format):
	338	"""Report extracted video URL."""
	339	self.to_screen(u'%s: Format %s not available' % (video_id, format))
	340
	341	def report_rtmp_download(self):
	342	"""Indicate the download will use the RTMP protocol."""
	343	self.to_screen(u'RTMP download detected')
	344
	345	def _extract_signature_function(self, video_id, player_url, slen):
	346	id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
	347	player_url)
	348	player_type = id_m.group('ext')
	349	player_id = id_m.group('id')
	350
	351	# Read from filesystem cache
	352	func_id = '%s_%s_%d' % (player_type, player_id, slen)
	353	assert os.path.basename(func_id) == func_id
	354	cache_dir = get_cachedir(self._downloader.params)
	355
	356	cache_enabled = cache_dir is not None
	357	if cache_enabled:
	358	cache_fn = os.path.join(os.path.expanduser(cache_dir),
	359	u'youtube-sigfuncs',
	360	func_id + '.json')
	361	try:
	362	with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
	363	cache_spec = json.load(cachef)
	364	return lambda s: u''.join(s[i] for i in cache_spec)
	365	except IOError:
	366	pass # No cache available
	367
	368	if player_type == 'js':
	369	code = self._download_webpage(
	370	player_url, video_id,
	371	note=u'Downloading %s player %s' % (player_type, player_id),
	372	errnote=u'Download of %s failed' % player_url)
	373	res = self._parse_sig_js(code)
	374	elif player_type == 'swf':
	375	urlh = self._request_webpage(
	376	player_url, video_id,
	377	note=u'Downloading %s player %s' % (player_type, player_id),
	378	errnote=u'Download of %s failed' % player_url)
	379	code = urlh.read()
	380	res = self._parse_sig_swf(code)
	381	else:
	382	assert False, 'Invalid player type %r' % player_type
	383
	384	if cache_enabled:
	385	try:
	386	test_string = u''.join(map(compat_chr, range(slen)))
	387	cache_res = res(test_string)
	388	cache_spec = [ord(c) for c in cache_res]
	389	try:
	390	os.makedirs(os.path.dirname(cache_fn))
	391	except OSError as ose:
	392	if ose.errno != errno.EEXIST:
	393	raise
	394	write_json_file(cache_spec, cache_fn)
	395	except Exception:
	396	tb = traceback.format_exc()
	397	self._downloader.report_warning(
	398	u'Writing cache to %r failed: %s' % (cache_fn, tb))
	399
	400	return res
	401
	402	def _print_sig_code(self, func, slen):
	403	def gen_sig_code(idxs):
	404	def _genslice(start, end, step):
	405	starts = u'' if start == 0 else str(start)
	406	ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
	407	steps = u'' if step == 1 else (u':%d' % step)
	408	return u's[%s%s%s]' % (starts, ends, steps)
	409
	410	step = None
	411	start = '(Never used)' # Quelch pyflakes warnings - start will be
	412	# set as soon as step is set
	413	for i, prev in zip(idxs[1:], idxs[:-1]):
	414	if step is not None:
	415	if i - prev == step:
	416	continue
	417	yield _genslice(start, prev, step)
	418	step = None
	419	continue
	420	if i - prev in [-1, 1]:
	421	step = i - prev
	422	start = prev
	423	continue
	424	else:
	425	yield u's[%d]' % prev
	426	if step is None:
	427	yield u's[%d]' % i
	428	else:
	429	yield _genslice(start, i, step)
	430
	431	test_string = u''.join(map(compat_chr, range(slen)))
	432	cache_res = func(test_string)
	433	cache_spec = [ord(c) for c in cache_res]
	434	expr_code = u' + '.join(gen_sig_code(cache_spec))
	435	code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
	436	self.to_screen(u'Extracted signature function:\n' + code)
	437
	438	def _parse_sig_js(self, jscode):
	439	funcname = self._search_regex(
	440	r'signature=([a-zA-Z]+)', jscode,
	441	u'Initial JS player signature function name')
	442
	443	jsi = JSInterpreter(jscode)
	444	initial_function = jsi.extract_function(funcname)
	445	return lambda s: initial_function([s])
	446
	447	def _parse_sig_swf(self, file_contents):
	448	if file_contents[1:3] != b'WS':
	449	raise ExtractorError(
	450	u'Not an SWF file; header is %r' % file_contents[:3])
	451	if file_contents[:1] == b'C':
	452	content = zlib.decompress(file_contents[8:])
	453	else:
	454	raise NotImplementedError(u'Unsupported compression format %r' %
	455	file_contents[:1])
	456
	457	def extract_tags(content):
	458	pos = 0
	459	while pos < len(content):
	460	header16 = struct.unpack('<H', content[pos:pos+2])[0]
	461	pos += 2
	462	tag_code = header16 >> 6
	463	tag_len = header16 & 0x3f
	464	if tag_len == 0x3f:
	465	tag_len = struct.unpack('<I', content[pos:pos+4])[0]
	466	pos += 4
	467	assert pos+tag_len <= len(content)
	468	yield (tag_code, content[pos:pos+tag_len])
	469	pos += tag_len
	470
	471	code_tag = next(tag
	472	for tag_code, tag in extract_tags(content)
	473	if tag_code == 82)
	474	p = code_tag.index(b'\0', 4) + 1
	475	code_reader = io.BytesIO(code_tag[p:])
	476
	477	# Parse ABC (AVM2 ByteCode)
	478	def read_int(reader=None):
	479	if reader is None:
	480	reader = code_reader
	481	res = 0
	482	shift = 0
	483	for _ in range(5):
	484	buf = reader.read(1)
	485	assert len(buf) == 1
	486	b = struct.unpack('<B', buf)[0]
	487	res = res \| ((b & 0x7f) << shift)
	488	if b & 0x80 == 0:
	489	break
	490	shift += 7
	491	return res
	492
	493	def u30(reader=None):
	494	res = read_int(reader)
	495	assert res & 0xf0000000 == 0
	496	return res
	497	u32 = read_int
	498
	499	def s32(reader=None):
	500	v = read_int(reader)
	501	if v & 0x80000000 != 0:
	502	v = - ((v ^ 0xffffffff) + 1)
	503	return v
	504
	505	def read_string(reader=None):
	506	if reader is None:
	507	reader = code_reader
	508	slen = u30(reader)
	509	resb = reader.read(slen)
	510	assert len(resb) == slen
	511	return resb.decode('utf-8')
	512
	513	def read_bytes(count, reader=None):
	514	if reader is None:
	515	reader = code_reader
	516	resb = reader.read(count)
	517	assert len(resb) == count
	518	return resb
	519
	520	def read_byte(reader=None):
	521	resb = read_bytes(1, reader=reader)
	522	res = struct.unpack('<B', resb)[0]
	523	return res
	524
	525	# minor_version + major_version
	526	read_bytes(2 + 2)
	527
	528	# Constant pool
	529	int_count = u30()
	530	for _c in range(1, int_count):
	531	s32()
	532	uint_count = u30()
	533	for _c in range(1, uint_count):
	534	u32()
	535	double_count = u30()
	536	read_bytes((double_count-1) * 8)
	537	string_count = u30()
	538	constant_strings = [u'']
	539	for _c in range(1, string_count):
	540	s = read_string()
	541	constant_strings.append(s)
	542	namespace_count = u30()
	543	for _c in range(1, namespace_count):
	544	read_bytes(1) # kind
	545	u30() # name
	546	ns_set_count = u30()
	547	for _c in range(1, ns_set_count):
	548	count = u30()
	549	for _c2 in range(count):
	550	u30()
	551	multiname_count = u30()
	552	MULTINAME_SIZES = {
	553	0x07: 2, # QName
	554	0x0d: 2, # QNameA
	555	0x0f: 1, # RTQName
	556	0x10: 1, # RTQNameA
	557	0x11: 0, # RTQNameL
	558	0x12: 0, # RTQNameLA
	559	0x09: 2, # Multiname
	560	0x0e: 2, # MultinameA
	561	0x1b: 1, # MultinameL
	562	0x1c: 1, # MultinameLA
	563	}
	564	multinames = [u'']
	565	for _c in range(1, multiname_count):
	566	kind = u30()
	567	assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
	568	if kind == 0x07:
	569	u30() # namespace_idx
	570	name_idx = u30()
	571	multinames.append(constant_strings[name_idx])
	572	else:
	573	multinames.append('[MULTINAME kind: %d]' % kind)
	574	for _c2 in range(MULTINAME_SIZES[kind]):
	575	u30()
	576
	577	# Methods
	578	method_count = u30()
	579	MethodInfo = collections.namedtuple(
	580	'MethodInfo',
	581	['NEED_ARGUMENTS', 'NEED_REST'])
	582	method_infos = []
	583	for method_id in range(method_count):
	584	param_count = u30()
	585	u30() # return type
	586	for _ in range(param_count):
	587	u30() # param type
	588	u30() # name index (always 0 for youtube)
	589	flags = read_byte()
	590	if flags & 0x08 != 0:
	591	# Options present
	592	option_count = u30()
	593	for c in range(option_count):
	594	u30() # val
	595	read_bytes(1) # kind
	596	if flags & 0x80 != 0:
	597	# Param names present
	598	for _ in range(param_count):
	599	u30() # param name
	600	mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
	601	method_infos.append(mi)
	602
	603	# Metadata
	604	metadata_count = u30()
	605	for _c in range(metadata_count):
	606	u30() # name
	607	item_count = u30()
	608	for _c2 in range(item_count):
	609	u30() # key
	610	u30() # value
	611
	612	def parse_traits_info():
	613	trait_name_idx = u30()
	614	kind_full = read_byte()
	615	kind = kind_full & 0x0f
	616	attrs = kind_full >> 4
	617	methods = {}
	618	if kind in [0x00, 0x06]: # Slot or Const
	619	u30() # Slot id
	620	u30() # type_name_idx
	621	vindex = u30()
	622	if vindex != 0:
	623	read_byte() # vkind
	624	elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
	625	u30() # disp_id
	626	method_idx = u30()
	627	methods[multinames[trait_name_idx]] = method_idx
	628	elif kind == 0x04: # Class
	629	u30() # slot_id
	630	u30() # classi
	631	elif kind == 0x05: # Function
	632	u30() # slot_id
	633	function_idx = u30()
	634	methods[function_idx] = multinames[trait_name_idx]
	635	else:
	636	raise ExtractorError(u'Unsupported trait kind %d' % kind)
	637
	638	if attrs & 0x4 != 0: # Metadata present
	639	metadata_count = u30()
	640	for _c3 in range(metadata_count):
	641	u30() # metadata index
	642
	643	return methods
	644
	645	# Classes
	646	TARGET_CLASSNAME = u'SignatureDecipher'
	647	searched_idx = multinames.index(TARGET_CLASSNAME)
	648	searched_class_id = None
	649	class_count = u30()
	650	for class_id in range(class_count):
	651	name_idx = u30()
	652	if name_idx == searched_idx:
	653	# We found the class we're looking for!
	654	searched_class_id = class_id
	655	u30() # super_name idx
	656	flags = read_byte()
	657	if flags & 0x08 != 0: # Protected namespace is present
	658	u30() # protected_ns_idx
	659	intrf_count = u30()
	660	for _c2 in range(intrf_count):
	661	u30()
	662	u30() # iinit
	663	trait_count = u30()
	664	for _c2 in range(trait_count):
	665	parse_traits_info()
	666
	667	if searched_class_id is None:
	668	raise ExtractorError(u'Target class %r not found' %
	669	TARGET_CLASSNAME)
	670
	671	method_names = {}
	672	method_idxs = {}
	673	for class_id in range(class_count):
	674	u30() # cinit
	675	trait_count = u30()
	676	for _c2 in range(trait_count):
	677	trait_methods = parse_traits_info()
	678	if class_id == searched_class_id:
	679	method_names.update(trait_methods.items())
	680	method_idxs.update(dict(
	681	(idx, name)
	682	for name, idx in trait_methods.items()))
	683
	684	# Scripts
	685	script_count = u30()
	686	for _c in range(script_count):
	687	u30() # init
	688	trait_count = u30()
	689	for _c2 in range(trait_count):
	690	parse_traits_info()
	691
	692	# Method bodies
	693	method_body_count = u30()
	694	Method = collections.namedtuple('Method', ['code', 'local_count'])
	695	methods = {}
	696	for _c in range(method_body_count):
	697	method_idx = u30()
	698	u30() # max_stack
	699	local_count = u30()
	700	u30() # init_scope_depth
	701	u30() # max_scope_depth
	702	code_length = u30()
	703	code = read_bytes(code_length)
	704	if method_idx in method_idxs:
	705	m = Method(code, local_count)
	706	methods[method_idxs[method_idx]] = m
	707	exception_count = u30()
	708	for _c2 in range(exception_count):
	709	u30() # from
	710	u30() # to
	711	u30() # target
	712	u30() # exc_type
	713	u30() # var_name
	714	trait_count = u30()
	715	for _c2 in range(trait_count):
	716	parse_traits_info()
	717
	718	assert p + code_reader.tell() == len(code_tag)
	719	assert len(methods) == len(method_idxs)
	720
	721	method_pyfunctions = {}
	722
	723	def extract_function(func_name):
	724	if func_name in method_pyfunctions:
	725	return method_pyfunctions[func_name]
	726	if func_name not in methods:
	727	raise ExtractorError(u'Cannot find function %r' % func_name)
	728	m = methods[func_name]
	729
	730	def resfunc(args):
	731	registers = ['(this)'] + list(args) + [None] * m.local_count
	732	stack = []
	733	coder = io.BytesIO(m.code)
	734	while True:
	735	opcode = struct.unpack('!B', coder.read(1))[0]
	736	if opcode == 36: # pushbyte
	737	v = struct.unpack('!B', coder.read(1))[0]
	738	stack.append(v)
	739	elif opcode == 44: # pushstring
	740	idx = u30(coder)
	741	stack.append(constant_strings[idx])
	742	elif opcode == 48: # pushscope
	743	# We don't implement the scope register, so we'll just
	744	# ignore the popped value
	745	stack.pop()
	746	elif opcode == 70: # callproperty
	747	index = u30(coder)
	748	mname = multinames[index]
	749	arg_count = u30(coder)
	750	args = list(reversed(
	751	[stack.pop() for _ in range(arg_count)]))
	752	obj = stack.pop()
	753	if mname == u'split':
	754	assert len(args) == 1
	755	assert isinstance(args[0], compat_str)
	756	assert isinstance(obj, compat_str)
	757	if args[0] == u'':
	758	res = list(obj)
	759	else:
	760	res = obj.split(args[0])
	761	stack.append(res)
	762	elif mname == u'slice':
	763	assert len(args) == 1
	764	assert isinstance(args[0], int)
	765	assert isinstance(obj, list)
	766	res = obj[args[0]:]
	767	stack.append(res)
	768	elif mname == u'join':
	769	assert len(args) == 1
	770	assert isinstance(args[0], compat_str)
	771	assert isinstance(obj, list)
	772	res = args[0].join(obj)
	773	stack.append(res)
	774	elif mname in method_pyfunctions:
	775	stack.append(method_pyfunctions[mname](args))
	776	else:
	777	raise NotImplementedError(
	778	u'Unsupported property %r on %r'
	779	% (mname, obj))
	780	elif opcode == 72: # returnvalue
	781	res = stack.pop()
	782	return res
	783	elif opcode == 79: # callpropvoid
	784	index = u30(coder)
	785	mname = multinames[index]
	786	arg_count = u30(coder)
	787	args = list(reversed(
	788	[stack.pop() for _ in range(arg_count)]))
	789	obj = stack.pop()
	790	if mname == u'reverse':
	791	assert isinstance(obj, list)
	792	obj.reverse()
	793	else:
	794	raise NotImplementedError(
	795	u'Unsupported (void) property %r on %r'
	796	% (mname, obj))
	797	elif opcode == 93: # findpropstrict
	798	index = u30(coder)
	799	mname = multinames[index]
	800	res = extract_function(mname)
	801	stack.append(res)
	802	elif opcode == 97: # setproperty
	803	index = u30(coder)
	804	value = stack.pop()
	805	idx = stack.pop()
	806	obj = stack.pop()
	807	assert isinstance(obj, list)
	808	assert isinstance(idx, int)
	809	obj[idx] = value
	810	elif opcode == 98: # getlocal
	811	index = u30(coder)
	812	stack.append(registers[index])
	813	elif opcode == 99: # setlocal
	814	index = u30(coder)
	815	value = stack.pop()
	816	registers[index] = value
	817	elif opcode == 102: # getproperty
	818	index = u30(coder)
	819	pname = multinames[index]
	820	if pname == u'length':
	821	obj = stack.pop()
	822	assert isinstance(obj, list)
	823	stack.append(len(obj))
	824	else: # Assume attribute access
	825	idx = stack.pop()
	826	assert isinstance(idx, int)
	827	obj = stack.pop()
	828	assert isinstance(obj, list)
	829	stack.append(obj[idx])
	830	elif opcode == 128: # coerce
	831	u30(coder)
	832	elif opcode == 133: # coerce_s
	833	assert isinstance(stack[-1], (type(None), compat_str))
	834	elif opcode == 164: # modulo
	835	value2 = stack.pop()
	836	value1 = stack.pop()
	837	res = value1 % value2
	838	stack.append(res)
	839	elif opcode == 208: # getlocal_0
	840	stack.append(registers[0])
	841	elif opcode == 209: # getlocal_1
	842	stack.append(registers[1])
	843	elif opcode == 210: # getlocal_2
	844	stack.append(registers[2])
	845	elif opcode == 211: # getlocal_3
	846	stack.append(registers[3])
	847	elif opcode == 214: # setlocal_2
	848	registers[2] = stack.pop()
	849	elif opcode == 215: # setlocal_3
	850	registers[3] = stack.pop()
	851	else:
	852	raise NotImplementedError(
	853	u'Unsupported opcode %d' % opcode)
	854
	855	method_pyfunctions[func_name] = resfunc
	856	return resfunc
	857
	858	initial_function = extract_function(u'decipher')
	859	return lambda s: initial_function([s])
	860
	861	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	862	"""Turn the encrypted s field into a working signature"""
	863
	864	if player_url is not None:
	865	if player_url.startswith(u'//'):
	866	player_url = u'https:' + player_url
	867	try:
	868	player_id = (player_url, len(s))
	869	if player_id not in self._player_cache:
	870	func = self._extract_signature_function(
	871	video_id, player_url, len(s)
	872	)
	873	self._player_cache[player_id] = func
	874	func = self._player_cache[player_id]
	875	if self._downloader.params.get('youtube_print_sig_code'):
	876	self._print_sig_code(func, len(s))
	877	return func(s)
	878	except Exception:
	879	tb = traceback.format_exc()
	880	self._downloader.report_warning(
	881	u'Automatic signature extraction failed: ' + tb)
	882
	883	self._downloader.report_warning(
	884	u'Warning: Falling back to static signature algorithm')
	885
	886	return self._static_decrypt_signature(
	887	s, video_id, player_url, age_gate)
	888
	889	def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
	890	if age_gate:
	891	# The videos with age protection use another player, so the
	892	# algorithms can be different.
	893	if len(s) == 86:
	894	return s[2:63] + s[82] + s[64:82] + s[63]
	895
	896	if len(s) == 93:
	897	return s[86:29:-1] + s[88] + s[28:5:-1]
	898	elif len(s) == 92:
	899	return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
	900	elif len(s) == 91:
	901	return s[84:27:-1] + s[86] + s[26:5:-1]
	902	elif len(s) == 90:
	903	return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
	904	elif len(s) == 89:
	905	return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
	906	elif len(s) == 88:
	907	return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
	908	elif len(s) == 87:
	909	return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
	910	elif len(s) == 86:
	911	return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
	912	elif len(s) == 85:
	913	return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
	914	elif len(s) == 84:
	915	return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
	916	elif len(s) == 83:
	917	return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
	918	elif len(s) == 82:
	919	return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
	920	elif len(s) == 81:
	921	return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
	922	elif len(s) == 80:
	923	return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
	924	elif len(s) == 79:
	925	return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
	926
	927	else:
	928	raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
	929
	930	def _get_available_subtitles(self, video_id, webpage):
	931	try:
	932	sub_list = self._download_webpage(
	933	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	934	video_id, note=False)
	935	except ExtractorError as err:
	936	self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
	937	return {}
	938	lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
	939
	940	sub_lang_list = {}
	941	for l in lang_list:
	942	lang = l[1]
	943	params = compat_urllib_parse.urlencode({
	944	'lang': lang,
	945	'v': video_id,
	946	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	947	'name': unescapeHTML(l[0]).encode('utf-8'),
	948	})
	949	url = u'https://www.youtube.com/api/timedtext?' + params
	950	sub_lang_list[lang] = url
	951	if not sub_lang_list:
	952	self._downloader.report_warning(u'video doesn\'t have subtitles')
	953	return {}
	954	return sub_lang_list
	955
	956	def _get_available_automatic_caption(self, video_id, webpage):
	957	"""We need the webpage for getting the captions url, pass it as an
	958	argument to speed up the process."""
	959	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	960	self.to_screen(u'%s: Looking for automatic captions' % video_id)
	961	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	962	err_msg = u'Couldn\'t find automatic captions for %s' % video_id
	963	if mobj is None:
	964	self._downloader.report_warning(err_msg)
	965	return {}
	966	player_config = json.loads(mobj.group(1))
	967	try:
	968	args = player_config[u'args']
	969	caption_url = args[u'ttsurl']
	970	timestamp = args[u'timestamp']
	971	# We get the available subtitles
	972	list_params = compat_urllib_parse.urlencode({
	973	'type': 'list',
	974	'tlangs': 1,
	975	'asrs': 1,
	976	})
	977	list_url = caption_url + '&' + list_params
	978	caption_list = self._download_xml(list_url, video_id)
	979	original_lang_node = caption_list.find('track')
	980	if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
	981	self._downloader.report_warning(u'Video doesn\'t have automatic captions')
	982	return {}
	983	original_lang = original_lang_node.attrib['lang_code']
	984
	985	sub_lang_list = {}
	986	for lang_node in caption_list.findall('target'):
	987	sub_lang = lang_node.attrib['lang_code']
	988	params = compat_urllib_parse.urlencode({
	989	'lang': original_lang,
	990	'tlang': sub_lang,
	991	'fmt': sub_format,
	992	'ts': timestamp,
	993	'kind': 'asr',
	994	})
	995	sub_lang_list[sub_lang] = caption_url + '&' + params
	996	return sub_lang_list
	997	# An extractor error can be raise by the download process if there are
	998	# no automatic captions but there are subtitles
	999	except (KeyError, ExtractorError):
	1000	self._downloader.report_warning(err_msg)
	1001	return {}
	1002
	1003	@classmethod
	1004	def extract_id(cls, url):
	1005	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	1006	if mobj is None:
	1007	raise ExtractorError(u'Invalid URL: %s' % url)
	1008	video_id = mobj.group(2)
	1009	return video_id
	1010
	1011	def _extract_from_m3u8(self, manifest_url, video_id):
	1012	url_map = {}
	1013	def _get_urls(_manifest):
	1014	lines = _manifest.split('\n')
	1015	urls = filter(lambda l: l and not l.startswith('#'),
	1016	lines)
	1017	return urls
	1018	manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
	1019	formats_urls = _get_urls(manifest)
	1020	for format_url in formats_urls:
	1021	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	1022	url_map[itag] = format_url
	1023	return url_map
	1024
	1025	def _extract_annotations(self, video_id):
	1026	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	1027	return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
	1028
	1029	def _real_extract(self, url):
	1030	proto = (
	1031	u'http' if self._downloader.params.get('prefer_insecure', False)
	1032	else u'https')
	1033
	1034	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	1035	mobj = re.search(self._NEXT_URL_RE, url)
	1036	if mobj:
	1037	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	1038	video_id = self.extract_id(url)
	1039
	1040	# Get video webpage
	1041	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
	1042	video_webpage = self._download_webpage(url, video_id)
	1043
	1044	# Attempt to extract SWF player URL
	1045	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1046	if mobj is not None:
	1047	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1048	else:
	1049	player_url = None
	1050
	1051	# Get video info
	1052	self.report_video_info_webpage_download(video_id)
	1053	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	1054	self.report_age_confirmation()
	1055	age_gate = True
	1056	# We simulate the access to the video from www.youtube.com/v/{video_id}
	1057	# this can be viewed without login into Youtube
	1058	data = compat_urllib_parse.urlencode({'video_id': video_id,
	1059	'el': 'player_embedded',
	1060	'gl': 'US',
	1061	'hl': 'en',
	1062	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	1063	'asv': 3,
	1064	'sts':'1588',
	1065	})
	1066	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	1067	video_info_webpage = self._download_webpage(video_info_url, video_id,
	1068	note=False,
	1069	errnote='unable to download video info webpage')
	1070	video_info = compat_parse_qs(video_info_webpage)
	1071	else:
	1072	age_gate = False
	1073	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1074	video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1075	% (video_id, el_type))
	1076	video_info_webpage = self._download_webpage(video_info_url, video_id,
	1077	note=False,
	1078	errnote='unable to download video info webpage')
	1079	video_info = compat_parse_qs(video_info_webpage)
	1080	if 'token' in video_info:
	1081	break
	1082	if 'token' not in video_info:
	1083	if 'reason' in video_info:
	1084	raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
	1085	else:
	1086	raise ExtractorError(u'"token" parameter not in video info for unknown reason')
	1087
	1088	if 'view_count' in video_info:
	1089	view_count = int(video_info['view_count'][0])
	1090	else:
	1091	view_count = None
	1092
	1093	# Check for "rental" videos
	1094	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	1095	raise ExtractorError(u'"rental" videos not supported')
	1096
	1097	# Start extracting information
	1098	self.report_information_extraction(video_id)
	1099
	1100	# uploader
	1101	if 'author' not in video_info:
	1102	raise ExtractorError(u'Unable to extract uploader name')
	1103	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	1104
	1105	# uploader_id
	1106	video_uploader_id = None
	1107	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	1108	if mobj is not None:
	1109	video_uploader_id = mobj.group(1)
	1110	else:
	1111	self._downloader.report_warning(u'unable to extract uploader nickname')
	1112
	1113	# title
	1114	if 'title' in video_info:
	1115	video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
	1116	else:
	1117	self._downloader.report_warning(u'Unable to extract video title')
	1118	video_title = u'_'
	1119
	1120	# thumbnail image
	1121	# We try first to get a high quality image:
	1122	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	1123	video_webpage, re.DOTALL)
	1124	if m_thumb is not None:
	1125	video_thumbnail = m_thumb.group(1)
	1126	elif 'thumbnail_url' not in video_info:
	1127	self._downloader.report_warning(u'unable to extract video thumbnail')
	1128	video_thumbnail = None
	1129	else: # don't panic if we can't find it
	1130	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	1131
	1132	# upload date
	1133	upload_date = None
	1134	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1135	if mobj is not None:
	1136	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1137	upload_date = unified_strdate(upload_date)
	1138
	1139	# description
	1140	video_description = get_element_by_id("eow-description", video_webpage)
	1141	if video_description:
	1142	video_description = re.sub(r'''(?x)
	1143	<a\s+
	1144	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1145	title="([^"]+)"\s+
	1146	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1147	class="yt-uix-redirect-link"\s*>
	1148	[^<]+
	1149	</a>
	1150	''', r'\1', video_description)
	1151	video_description = clean_html(video_description)
	1152	else:
	1153	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	1154	if fd_mobj:
	1155	video_description = unescapeHTML(fd_mobj.group(1))
	1156	else:
	1157	video_description = u''
	1158
	1159	def _extract_count(klass):
	1160	count = self._search_regex(
	1161	r'class="%s">([\d,]+)</span>' % re.escape(klass),
	1162	video_webpage, klass, default=None)
	1163	if count is not None:
	1164	return int(count.replace(',', ''))
	1165	return None
	1166	like_count = _extract_count(u'likes-count')
	1167	dislike_count = _extract_count(u'dislikes-count')
	1168
	1169	# subtitles
	1170	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	1171
	1172	if self._downloader.params.get('listsubtitles', False):
	1173	self._list_available_subtitles(video_id, video_webpage)
	1174	return
	1175
	1176	if 'length_seconds' not in video_info:
	1177	self._downloader.report_warning(u'unable to extract video duration')
	1178	video_duration = None
	1179	else:
	1180	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	1181
	1182	# annotations
	1183	video_annotations = None
	1184	if self._downloader.params.get('writeannotations', False):
	1185	video_annotations = self._extract_annotations(video_id)
	1186
	1187	# Decide which formats to download
	1188	try:
	1189	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	1190	if not mobj:
	1191	raise ValueError('Could not find vevo ID')
	1192	json_code = uppercase_escape(mobj.group(1))
	1193	ytplayer_config = json.loads(json_code)
	1194	args = ytplayer_config['args']
	1195	# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
	1196	# this signatures are encrypted
	1197	if 'url_encoded_fmt_stream_map' not in args:
	1198	raise ValueError(u'No stream_map present') # caught below
	1199	re_signature = re.compile(r'[&,]s=')
	1200	m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
	1201	if m_s is not None:
	1202	self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
	1203	video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
	1204	m_s = re_signature.search(args.get('adaptive_fmts', u''))
	1205	if m_s is not None:
	1206	if 'adaptive_fmts' in video_info:
	1207	video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
	1208	else:
	1209	video_info['adaptive_fmts'] = [args['adaptive_fmts']]
	1210	except ValueError:
	1211	pass
	1212
	1213	def _map_to_format_list(urlmap):
	1214	formats = []
	1215	for itag, video_real_url in urlmap.items():
	1216	dct = {
	1217	'format_id': itag,
	1218	'url': video_real_url,
	1219	'player_url': player_url,
	1220	}
	1221	if itag in self._formats:
	1222	dct.update(self._formats[itag])
	1223	formats.append(dct)
	1224	return formats
	1225
	1226	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1227	self.report_rtmp_download()
	1228	formats = [{
	1229	'format_id': '_rtmp',
	1230	'protocol': 'rtmp',
	1231	'url': video_info['conn'][0],
	1232	'player_url': player_url,
	1233	}]
	1234	elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
	1235	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
	1236	if 'rtmpe%3Dyes' in encoded_url_map:
	1237	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1238	url_map = {}
	1239	for url_data_str in encoded_url_map.split(','):
	1240	url_data = compat_parse_qs(url_data_str)
	1241	if 'itag' in url_data and 'url' in url_data:
	1242	url = url_data['url'][0]
	1243	if 'sig' in url_data:
	1244	url += '&signature=' + url_data['sig'][0]
	1245	elif 's' in url_data:
	1246	encrypted_sig = url_data['s'][0]
	1247	if self._downloader.params.get('verbose'):
	1248	if age_gate:
	1249	if player_url is None:
	1250	player_version = 'unknown'
	1251	else:
	1252	player_version = self._search_regex(
	1253	r'-(.+)\.swf$', player_url,
	1254	u'flash player', fatal=False)
	1255	player_desc = 'flash player %s' % player_version
	1256	else:
	1257	player_version = self._search_regex(
	1258	r'html5player-(.+?)\.js', video_webpage,
	1259	'html5 player', fatal=False)
	1260	player_desc = u'html5 player %s' % player_version
	1261
	1262	parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
	1263	self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
	1264	(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
	1265
	1266	if not age_gate:
	1267	jsplayer_url_json = self._search_regex(
	1268	r'"assets":.+?"js":\s*("[^"]+")',
	1269	video_webpage, u'JS player URL')
	1270	player_url = json.loads(jsplayer_url_json)
	1271
	1272	signature = self._decrypt_signature(
	1273	encrypted_sig, video_id, player_url, age_gate)
	1274	url += '&signature=' + signature
	1275	if 'ratebypass' not in url:
	1276	url += '&ratebypass=yes'
	1277	url_map[url_data['itag'][0]] = url
	1278	formats = _map_to_format_list(url_map)
	1279	elif video_info.get('hlsvp'):
	1280	manifest_url = video_info['hlsvp'][0]
	1281	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1282	formats = _map_to_format_list(url_map)
	1283	else:
	1284	raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1285
	1286	# Look for the DASH manifest
	1287	if (self._downloader.params.get('youtube_include_dash_manifest', False)):
	1288	try:
	1289	# The DASH manifest used needs to be the one from the original video_webpage.
	1290	# The one found in get_video_info seems to be using different signatures.
	1291	# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
	1292	# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
	1293	# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
	1294	if age_gate:
	1295	dash_manifest_url = video_info.get('dashmpd')[0]
	1296	else:
	1297	dash_manifest_url = ytplayer_config['args']['dashmpd']
	1298	def decrypt_sig(mobj):
	1299	s = mobj.group(1)
	1300	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	1301	return '/signature/%s' % dec_s
	1302	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	1303	dash_doc = self._download_xml(
	1304	dash_manifest_url, video_id,
	1305	note=u'Downloading DASH manifest',
	1306	errnote=u'Could not download DASH manifest')
	1307	for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	1308	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	1309	if url_el is None:
	1310	continue
	1311	format_id = r.attrib['id']
	1312	video_url = url_el.text
	1313	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	1314	f = {
	1315	'format_id': format_id,
	1316	'url': video_url,
	1317	'width': int_or_none(r.attrib.get('width')),
	1318	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	1319	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	1320	'filesize': filesize,
	1321	}
	1322	try:
	1323	existing_format = next(
	1324	fo for fo in formats
	1325	if fo['format_id'] == format_id)
	1326	except StopIteration:
	1327	f.update(self._formats.get(format_id, {}))
	1328	formats.append(f)
	1329	else:
	1330	existing_format.update(f)
	1331
	1332	except (ExtractorError, KeyError) as e:
	1333	self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
	1334
	1335	self._sort_formats(formats)
	1336
	1337	return {
	1338	'id': video_id,
	1339	'uploader': video_uploader,
	1340	'uploader_id': video_uploader_id,
	1341	'upload_date': upload_date,
	1342	'title': video_title,
	1343	'thumbnail': video_thumbnail,
	1344	'description': video_description,
	1345	'subtitles': video_subtitles,
	1346	'duration': video_duration,
	1347	'age_limit': 18 if age_gate else 0,
	1348	'annotations': video_annotations,
	1349	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1350	'view_count': view_count,
	1351	'like_count': like_count,
	1352	'dislike_count': dislike_count,
	1353	'formats': formats,
	1354	}
	1355
	1356	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1357	IE_DESC = u'YouTube.com playlists'
	1358	_VALID_URL = r"""(?x)(?:
	1359	(?:https?://)?
	1360	(?:\w+\.)?
	1361	youtube\.com/
	1362	(?:
	1363	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch)
	1364	\? (?:.?&)? (?:p\|a\|list)=
	1365	\| p/
	1366	)
	1367	(
	1368	(?:PL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1369	# Top tracks, they can also include dots
	1370	\|(?:MC)[\w\.]*
	1371	)
	1372	.*
	1373	\|
	1374	((?:PL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1375	)"""
	1376	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1377	_MORE_PAGES_INDICATOR = r'data-link-type="next"'
	1378	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1379	IE_NAME = u'youtube:playlist'
	1380
	1381	def _real_initialize(self):
	1382	self._login()
	1383
	1384	def _ids_to_results(self, ids):
	1385	return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1386	for vid_id in ids]
	1387
	1388	def _extract_mix(self, playlist_id):
	1389	# The mixes are generated from a a single video
	1390	# the id of the playlist is just 'RD' + video_id
	1391	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1392	webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
	1393	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1394	title_span = (search_title('playlist-title') or
	1395	search_title('title long-title') or search_title('title'))
	1396	title = clean_html(title_span)
	1397	video_re = r'''(?x)data-video-username="(.?)".?
	1398	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
	1399	matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
	1400	# Some of the videos may have been deleted, their username field is empty
	1401	ids = [video_id for (username, video_id) in matches if username]
	1402	url_results = self._ids_to_results(ids)
	1403
	1404	return self.playlist_result(url_results, playlist_id, title)
	1405
	1406	def _real_extract(self, url):
	1407	# Extract playlist id
	1408	mobj = re.match(self._VALID_URL, url)
	1409	if mobj is None:
	1410	raise ExtractorError(u'Invalid URL: %s' % url)
	1411	playlist_id = mobj.group(1) or mobj.group(2)
	1412
	1413	# Check if it's a video-specific URL
	1414	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1415	if 'v' in query_dict:
	1416	video_id = query_dict['v'][0]
	1417	if self._downloader.params.get('noplaylist'):
	1418	self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
	1419	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1420	else:
	1421	self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1422
	1423	if playlist_id.startswith('RD'):
	1424	# Mixes require a custom extraction process
	1425	return self._extract_mix(playlist_id)
	1426	if playlist_id.startswith('TL'):
	1427	raise ExtractorError(u'For downloading YouTube.com top lists, use '
	1428	u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
	1429
	1430	url = self._TEMPLATE_URL % playlist_id
	1431	page = self._download_webpage(url, playlist_id)
	1432	more_widget_html = content_html = page
	1433
	1434	# Extract the video ids from the playlist pages
	1435	ids = []
	1436
	1437	for page_num in itertools.count(1):
	1438	matches = re.finditer(self._VIDEO_RE, content_html)
	1439	# We remove the duplicates and the link with index 0
	1440	# (it's not the first video of the playlist)
	1441	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1442	ids.extend(new_ids)
	1443
	1444	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1445	if not mobj:
	1446	break
	1447
	1448	more = self._download_json(
	1449	'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
	1450	content_html = more['content_html']
	1451	more_widget_html = more['load_more_widget_html']
	1452
	1453	playlist_title = self._html_search_regex(
	1454	r'<h1 class="pl-header-title">\s(.?)\s*</h1>', page, u'title')
	1455
	1456	url_results = self._ids_to_results(ids)
	1457	return self.playlist_result(url_results, playlist_id, playlist_title)
	1458
	1459
	1460	class YoutubeTopListIE(YoutubePlaylistIE):
	1461	IE_NAME = u'youtube:toplist'
	1462	IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
	1463	u' (Example: "yttoplist:music:Top Tracks")')
	1464	_VALID_URL = r'yttoplist:(?P<chann>.?):(?P<title>.?)$'
	1465
	1466	def _real_extract(self, url):
	1467	mobj = re.match(self._VALID_URL, url)
	1468	channel = mobj.group('chann')
	1469	title = mobj.group('title')
	1470	query = compat_urllib_parse.urlencode({'title': title})
	1471	playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
	1472	channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
	1473	link = self._html_search_regex(playlist_re, channel_page, u'list')
	1474	url = compat_urlparse.urljoin('https://www.youtube.com/', link)
	1475
	1476	video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
	1477	ids = []
	1478	# sometimes the webpage doesn't contain the videos
	1479	# retry until we get them
	1480	for i in itertools.count(0):
	1481	msg = u'Downloading Youtube mix'
	1482	if i > 0:
	1483	msg += ', retry #%d' % i
	1484	webpage = self._download_webpage(url, title, msg)
	1485	ids = orderedSet(re.findall(video_re, webpage))
	1486	if ids:
	1487	break
	1488	url_results = self._ids_to_results(ids)
	1489	return self.playlist_result(url_results, playlist_title=title)
	1490
	1491
	1492	class YoutubeChannelIE(InfoExtractor):
	1493	IE_DESC = u'YouTube.com channels'
	1494	_VALID_URL = r"^(?:https?://)?(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
	1495	_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
	1496	_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
	1497	IE_NAME = u'youtube:channel'
	1498
	1499	def extract_videos_from_page(self, page):
	1500	ids_in_page = []
	1501	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1502	if mobj.group(1) not in ids_in_page:
	1503	ids_in_page.append(mobj.group(1))
	1504	return ids_in_page
	1505
	1506	def _real_extract(self, url):
	1507	# Extract channel id
	1508	mobj = re.match(self._VALID_URL, url)
	1509	if mobj is None:
	1510	raise ExtractorError(u'Invalid URL: %s' % url)
	1511
	1512	# Download channel page
	1513	channel_id = mobj.group(1)
	1514	video_ids = []
	1515	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1516	channel_page = self._download_webpage(url, channel_id)
	1517	autogenerated = re.search(r'''(?x)
	1518	class="[^"]*?(?:
	1519	channel-header-autogenerated-label\|
	1520	yt-channel-title-autogenerated
	1521	)[^"]*"''', channel_page) is not None
	1522
	1523	if autogenerated:
	1524	# The videos are contained in a single page
	1525	# the ajax pages can't be used, they are empty
	1526	video_ids = self.extract_videos_from_page(channel_page)
	1527	else:
	1528	# Download all channel pages using the json-based channel_ajax query
	1529	for pagenum in itertools.count(1):
	1530	url = self._MORE_PAGES_URL % (pagenum, channel_id)
	1531	page = self._download_json(
	1532	url, channel_id, note=u'Downloading page #%s' % pagenum,
	1533	transform_source=uppercase_escape)
	1534
	1535	ids_in_page = self.extract_videos_from_page(page['content_html'])
	1536	video_ids.extend(ids_in_page)
	1537
	1538	if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
	1539	break
	1540
	1541	self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
	1542
	1543	url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1544	for video_id in video_ids]
	1545	return self.playlist_result(url_entries, channel_id)
	1546
	1547
	1548	class YoutubeUserIE(InfoExtractor):
	1549	IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
	1550	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
	1551	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1552	_GDATA_PAGE_SIZE = 50
	1553	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1554	IE_NAME = u'youtube:user'
	1555
	1556	@classmethod
	1557	def suitable(cls, url):
	1558	# Don't return True if the url can be extracted with other youtube
	1559	# extractor, the regex would is too permissive and it would match.
	1560	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1561	if any(ie.suitable(url) for ie in other_ies): return False
	1562	else: return super(YoutubeUserIE, cls).suitable(url)
	1563
	1564	def _real_extract(self, url):
	1565	# Extract username
	1566	mobj = re.match(self._VALID_URL, url)
	1567	if mobj is None:
	1568	raise ExtractorError(u'Invalid URL: %s' % url)
	1569
	1570	username = mobj.group(1)
	1571
	1572	# Download video ids using YouTube Data API. Result size per
	1573	# query is limited (currently to 50 videos) so we need to query
	1574	# page by page until there are no video ids - it means we got
	1575	# all of them.
	1576
	1577	def download_page(pagenum):
	1578	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1579
	1580	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1581	page = self._download_webpage(
	1582	gdata_url, username,
	1583	u'Downloading video ids from %d to %d' % (
	1584	start_index, start_index + self._GDATA_PAGE_SIZE))
	1585
	1586	try:
	1587	response = json.loads(page)
	1588	except ValueError as err:
	1589	raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
	1590	if 'entry' not in response['feed']:
	1591	return
	1592
	1593	# Extract video identifiers
	1594	entries = response['feed']['entry']
	1595	for entry in entries:
	1596	title = entry['title']['$t']
	1597	video_id = entry['id']['$t'].split('/')[-1]
	1598	yield {
	1599	'_type': 'url',
	1600	'url': video_id,
	1601	'ie_key': 'Youtube',
	1602	'id': video_id,
	1603	'title': title,
	1604	}
	1605	url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
	1606
	1607	return self.playlist_result(url_results, playlist_title=username)
	1608
	1609
	1610	class YoutubeSearchIE(SearchInfoExtractor):
	1611	IE_DESC = u'YouTube.com searches'
	1612	_API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1613	_MAX_RESULTS = 1000
	1614	IE_NAME = u'youtube:search'
	1615	_SEARCH_KEY = 'ytsearch'
	1616
	1617	def _get_n_results(self, query, n):
	1618	"""Get a specified number of results for a query"""
	1619
	1620	video_ids = []
	1621	pagenum = 0
	1622	limit = n
	1623	PAGE_SIZE = 50
	1624
	1625	while (PAGE_SIZE * pagenum) < limit:
	1626	result_url = self._API_URL % (
	1627	compat_urllib_parse.quote_plus(query.encode('utf-8')),
	1628	(PAGE_SIZE * pagenum) + 1)
	1629	data_json = self._download_webpage(
	1630	result_url, video_id=u'query "%s"' % query,
	1631	note=u'Downloading page %s' % (pagenum + 1),
	1632	errnote=u'Unable to download API page')
	1633	data = json.loads(data_json)
	1634	api_response = data['data']
	1635
	1636	if 'items' not in api_response:
	1637	raise ExtractorError(
	1638	u'[youtube] No video results', expected=True)
	1639
	1640	new_ids = list(video['id'] for video in api_response['items'])
	1641	video_ids += new_ids
	1642
	1643	limit = min(n, api_response['totalItems'])
	1644	pagenum += 1
	1645
	1646	if len(video_ids) > n:
	1647	video_ids = video_ids[:n]
	1648	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1649	for video_id in video_ids]
	1650	return self.playlist_result(videos, query)
	1651
	1652
	1653	class YoutubeSearchDateIE(YoutubeSearchIE):
	1654	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1655	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1656	_SEARCH_KEY = 'ytsearchdate'
	1657	IE_DESC = u'YouTube.com searches, newest videos first'
	1658
	1659
	1660	class YoutubeSearchURLIE(InfoExtractor):
	1661	IE_DESC = u'YouTube.com search URLs'
	1662	IE_NAME = u'youtube:search_url'
	1663	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1664
	1665	def _real_extract(self, url):
	1666	mobj = re.match(self._VALID_URL, url)
	1667	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1668
	1669	webpage = self._download_webpage(url, query)
	1670	result_code = self._search_regex(
	1671	r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
	1672
	1673	part_codes = re.findall(
	1674	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1675	entries = []
	1676	for part_code in part_codes:
	1677	part_title = self._html_search_regex(
	1678	r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
	1679	part_url_snippet = self._html_search_regex(
	1680	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1681	part_url = compat_urlparse.urljoin(
	1682	'https://www.youtube.com/', part_url_snippet)
	1683	entries.append({
	1684	'_type': 'url',
	1685	'url': part_url,
	1686	'title': part_title,
	1687	})
	1688
	1689	return {
	1690	'_type': 'playlist',
	1691	'entries': entries,
	1692	'title': query,
	1693	}
	1694
	1695
	1696	class YoutubeShowIE(InfoExtractor):
	1697	IE_DESC = u'YouTube.com (multi-season) shows'
	1698	_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
	1699	IE_NAME = u'youtube:show'
	1700
	1701	def _real_extract(self, url):
	1702	mobj = re.match(self._VALID_URL, url)
	1703	show_name = mobj.group(1)
	1704	webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
	1705	# There's one playlist for each season of the show
	1706	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1707	self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
	1708	return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
	1709
	1710
	1711	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1712	"""
	1713	Base class for extractors that fetch info from
	1714	http://www.youtube.com/feed_ajax
	1715	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1716	"""
	1717	_LOGIN_REQUIRED = True
	1718	# use action_load_personal_feed instead of action_load_system_feed
	1719	_PERSONAL_FEED = False
	1720
	1721	@property
	1722	def _FEED_TEMPLATE(self):
	1723	action = 'action_load_system_feed'
	1724	if self._PERSONAL_FEED:
	1725	action = 'action_load_personal_feed'
	1726	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1727
	1728	@property
	1729	def IE_NAME(self):
	1730	return u'youtube:%s' % self._FEED_NAME
	1731
	1732	def _real_initialize(self):
	1733	self._login()
	1734
	1735	def _real_extract(self, url):
	1736	feed_entries = []
	1737	paging = 0
	1738	for i in itertools.count(1):
	1739	info = self._download_webpage(self._FEED_TEMPLATE % paging,
	1740	u'%s feed' % self._FEED_NAME,
	1741	u'Downloading page %s' % i)
	1742	info = json.loads(info)
	1743	feed_html = info['feed_html']
	1744	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1745	ids = orderedSet(m.group(1) for m in m_ids)
	1746	feed_entries.extend(
	1747	self.url_result(video_id, 'Youtube', video_id=video_id)
	1748	for video_id in ids)
	1749	if info['paging'] is None:
	1750	break
	1751	paging = info['paging']
	1752	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1753
	1754	class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
	1755	IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
	1756	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1757	_FEED_NAME = 'subscriptions'
	1758	_PLAYLIST_TITLE = u'Youtube Subscriptions'
	1759
	1760	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1761	IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
	1762	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1763	_FEED_NAME = 'recommended'
	1764	_PLAYLIST_TITLE = u'Youtube Recommended videos'
	1765
	1766	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1767	IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
	1768	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1769	_FEED_NAME = 'watch_later'
	1770	_PLAYLIST_TITLE = u'Youtube Watch Later'
	1771	_PERSONAL_FEED = True
	1772
	1773	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1774	IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
	1775	_VALID_URL = u'https?://www\.youtube\.com/feed/history\|:ythistory'
	1776	_FEED_NAME = 'history'
	1777	_PERSONAL_FEED = True
	1778	_PLAYLIST_TITLE = u'Youtube Watch History'
	1779
	1780	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1781	IE_NAME = u'youtube:favorites'
	1782	IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
	1783	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1784	_LOGIN_REQUIRED = True
	1785
	1786	def _real_extract(self, url):
	1787	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1788	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
	1789	return self.url_result(playlist_id, 'YoutubePlaylist')
	1790
	1791
	1792	class YoutubeTruncatedURLIE(InfoExtractor):
	1793	IE_NAME = 'youtube:truncated_url'
	1794	IE_DESC = False # Do not list
	1795	_VALID_URL = r'''(?x)
	1796	(?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$\|
	1797	(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
	1798	'''
	1799
	1800	def _real_extract(self, url):
	1801	raise ExtractorError(
	1802	u'Did you forget to quote the URL? Remember that & is a meta '
	1803	u'character in most shells, so you want to put the URL in quotes, '
	1804	u'like youtube-dl '
	1805	u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1806	u' or simply youtube-dl BaW_jenozKc .',
	1807	expected=True)