jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from .subtitles import SubtitlesInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse,
	21	compat_urllib_request,
	22	compat_urlparse,
	23	compat_str,
	24	)
	25	from ..utils import (
	26	clean_html,
	27	ExtractorError,
	28	get_element_by_attribute,
	29	get_element_by_id,
	30	int_or_none,
	31	OnDemandPagedList,
	32	orderedSet,
	33	unescapeHTML,
	34	unified_strdate,
	35	uppercase_escape,
	36	)
	37
	38
	39	class YoutubeBaseInfoExtractor(InfoExtractor):
	40	"""Provide base functions for Youtube extractors"""
	41	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	42	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	43	_NETRC_MACHINE = 'youtube'
	44	# If True it will raise an error if no login info is provided
	45	_LOGIN_REQUIRED = False
	46
	47	def _set_language(self):
	48	self._set_cookie(
	49	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	50	# YouTube sets the expire time to about two months
	51	expire_time=time.time() + 2 * 30 * 24 * 3600)
	52
	53	def _login(self):
	54	"""
	55	Attempt to log in to YouTube.
	56	True is returned if successful or skipped.
	57	False is returned if login failed.
	58
	59	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	60	"""
	61	(username, password) = self._get_login_info()
	62	# No authentication to be performed
	63	if username is None:
	64	if self._LOGIN_REQUIRED:
	65	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	66	return True
	67
	68	login_page = self._download_webpage(
	69	self._LOGIN_URL, None,
	70	note='Downloading login page',
	71	errnote='unable to fetch login page', fatal=False)
	72	if login_page is False:
	73	return
	74
	75	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	76	login_page, 'Login GALX parameter')
	77
	78	# Log in
	79	login_form_strs = {
	80	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	81	'Email': username,
	82	'GALX': galx,
	83	'Passwd': password,
	84
	85	'PersistentCookie': 'yes',
	86	'_utf8': '霱',
	87	'bgresponse': 'js_disabled',
	88	'checkConnection': '',
	89	'checkedDomains': 'youtube',
	90	'dnConn': '',
	91	'pstMsg': '0',
	92	'rmShown': '1',
	93	'secTok': '',
	94	'signIn': 'Sign in',
	95	'timeStmp': '',
	96	'service': 'youtube',
	97	'uilel': '3',
	98	'hl': 'en_US',
	99	}
	100
	101	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	102	# chokes on unicode
	103	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	104	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	105
	106	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	107	login_results = self._download_webpage(
	108	req, None,
	109	note='Logging in', errnote='unable to log in', fatal=False)
	110	if login_results is False:
	111	return False
	112
	113	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	114	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	115
	116	# Two-Factor
	117	# TODO add SMS and phone call support - these require making a request and then prompting the user
	118
	119	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	120	tfa_code = self._get_tfa_info()
	121
	122	if tfa_code is None:
	123	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	124	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	125	return False
	126
	127	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	128
	129	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	130	if match is None:
	131	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	132	secTok = match.group(1)
	133	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	134	if match is None:
	135	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	136	timeStmp = match.group(1)
	137
	138	tfa_form_strs = {
	139	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	140	'smsToken': '',
	141	'smsUserPin': tfa_code,
	142	'smsVerifyPin': 'Verify',
	143
	144	'PersistentCookie': 'yes',
	145	'checkConnection': '',
	146	'checkedDomains': 'youtube',
	147	'pstMsg': '1',
	148	'secTok': secTok,
	149	'timeStmp': timeStmp,
	150	'service': 'youtube',
	151	'hl': 'en_US',
	152	}
	153	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	154	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	155
	156	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	157	tfa_results = self._download_webpage(
	158	tfa_req, None,
	159	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	160
	161	if tfa_results is False:
	162	return False
	163
	164	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	165	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	166	return False
	167	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	168	self._downloader.report_warning('unable to log in - did the page structure change?')
	169	return False
	170	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	171	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	172	return False
	173
	174	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	175	self._downloader.report_warning('unable to log in: bad username or password')
	176	return False
	177	return True
	178
	179	def _real_initialize(self):
	180	if self._downloader is None:
	181	return
	182	self._set_language()
	183	if not self._login():
	184	return
	185
	186
	187	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	188	IE_DESC = 'YouTube.com'
	189	_VALID_URL = r"""(?x)^
	190	(
	191	(?:https?://\|//) # http(s):// or protocol-independent URL
	192	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	193	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	194	(?:www\.)?pwnyoutube\.com/\|
	195	(?:www\.)?yourepeat\.com/\|
	196	tube\.majestyc\.net/\|
	197	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	198	(?:.*?\#/)? # handle anchor (#/) redirect urls
	199	(?: # the various things that can precede the ID:
	200	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	201	\|(?: # or the v= param in all its forms
	202	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	203	(?:\?\|\#!?) # the params delimiter ? or # or #!
	204	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	205	v=
	206	)
	207	))
	208	\|youtu\.be/ # just youtu.be/xxxx
	209	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	210	)
	211	)? # all until now is optional -> you can pass the naked ID
	212	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	213	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	214	(?(1).+)? # if we found the ID, everything can follow
	215	$"""
	216	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	217	_formats = {
	218	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	219	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	220	'13': {'ext': '3gp'},
	221	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	222	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	223	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	224	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	225	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	226	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	227	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	228	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	229	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	230	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	231	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	232	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	233
	234
	235	# 3d videos
	236	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	237	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	238	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	239	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	240	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	241	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	242	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	243
	244	# Apple HTTP Live Streaming
	245	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	246	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	247	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	248	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	249	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	250	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	251	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	252
	253	# DASH mp4 video
	254	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	255	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	256	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	257	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	258	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	259	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	260	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	261	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	262	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	263	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	264	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	265
	266	# Dash mp4 audio
	267	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
	268	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
	269	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
	270
	271	# Dash webm
	272	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	273	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	274	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	275	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	276	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	277	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	278	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
	279	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	280	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	281	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	282	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	283	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	284	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	285	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	286	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	287	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	288	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	289	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	290	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	291	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
	292	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	293
	294	# Dash webm audio
	295	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	296	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	297
	298	# Dash webm audio with opus inside
	299	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	300	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	301	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	302
	303	# RTMP (unnamed)
	304	'_rtmp': {'protocol': 'rtmp'},
	305	}
	306
	307	IE_NAME = 'youtube'
	308	_TESTS = [
	309	{
	310	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
	311	'info_dict': {
	312	'id': 'BaW_jenozKc',
	313	'ext': 'mp4',
	314	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	315	'uploader': 'Philipp Hagemeister',
	316	'uploader_id': 'phihag',
	317	'upload_date': '20121002',
	318	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	319	'categories': ['Science & Technology'],
	320	'like_count': int,
	321	'dislike_count': int,
	322	}
	323	},
	324	{
	325	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	326	'note': 'Test generic use_cipher_signature video (#897)',
	327	'info_dict': {
	328	'id': 'UxxajLWwzqY',
	329	'ext': 'mp4',
	330	'upload_date': '20120506',
	331	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	332	'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
	333	'uploader': 'Icona Pop',
	334	'uploader_id': 'IconaPop',
	335	}
	336	},
	337	{
	338	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	339	'note': 'Test VEVO video with age protection (#956)',
	340	'info_dict': {
	341	'id': '07FYdnEawAQ',
	342	'ext': 'mp4',
	343	'upload_date': '20130703',
	344	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	345	'description': 'md5:64249768eec3bc4276236606ea996373',
	346	'uploader': 'justintimberlakeVEVO',
	347	'uploader_id': 'justintimberlakeVEVO',
	348	}
	349	},
	350	{
	351	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	352	'note': 'Embed-only video (#1746)',
	353	'info_dict': {
	354	'id': 'yZIXLfi8CZQ',
	355	'ext': 'mp4',
	356	'upload_date': '20120608',
	357	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	358	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	359	'uploader': 'SET India',
	360	'uploader_id': 'setindia'
	361	}
	362	},
	363	{
	364	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	365	'note': '256k DASH audio (format 141) via DASH manifest',
	366	'info_dict': {
	367	'id': 'a9LDPn-MO4I',
	368	'ext': 'm4a',
	369	'upload_date': '20121002',
	370	'uploader_id': '8KVIDEO',
	371	'description': '',
	372	'uploader': '8KVIDEO',
	373	'title': 'UHDTV TEST 8K VIDEO.mp4'
	374	},
	375	'params': {
	376	'youtube_include_dash_manifest': True,
	377	'format': '141',
	378	},
	379	},
	380	# DASH manifest with encrypted signature
	381	{
	382	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	383	'info_dict': {
	384	'id': 'IB3lcPjvWLA',
	385	'ext': 'm4a',
	386	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	387	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	388	'uploader': 'AfrojackVEVO',
	389	'uploader_id': 'AfrojackVEVO',
	390	'upload_date': '20131011',
	391	},
	392	'params': {
	393	'youtube_include_dash_manifest': True,
	394	'format': '141',
	395	},
	396	},
	397	# Controversy video
	398	{
	399	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	400	'info_dict': {
	401	'id': 'T4XJQO3qol8',
	402	'ext': 'mp4',
	403	'upload_date': '20100909',
	404	'uploader': 'The Amazing Atheist',
	405	'uploader_id': 'TheAmazingAtheist',
	406	'title': 'Burning Everyone\'s Koran',
	407	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	408	}
	409	},
	410	# Normal age-gate video (No vevo, embed allowed)
	411	{
	412	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	413	'info_dict': {
	414	'id': 'HtVdAasjOgU',
	415	'ext': 'mp4',
	416	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	417	'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
	418	'uploader': 'The Witcher',
	419	'uploader_id': 'WitcherGame',
	420	'upload_date': '20140605',
	421	},
	422	},
	423	# Age-gate video with encrypted signature
	424	{
	425	'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
	426	'info_dict': {
	427	'id': '6kLq3WMV1nU',
	428	'ext': 'mp4',
	429	'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
	430	'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
	431	'uploader': 'LloydVEVO',
	432	'uploader_id': 'LloydVEVO',
	433	'upload_date': '20110629',
	434	},
	435	},
	436	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	437	{
	438	'url': '__2ABJjxzNo',
	439	'info_dict': {
	440	'id': '__2ABJjxzNo',
	441	'ext': 'mp4',
	442	'upload_date': '20100430',
	443	'uploader_id': 'deadmau5',
	444	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	445	'uploader': 'deadmau5',
	446	'title': 'Deadmau5 - Some Chords (HD)',
	447	},
	448	'expected_warnings': [
	449	'DASH manifest missing',
	450	]
	451	},
	452	# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
	453	{
	454	'url': 'lqQg6PlCWgI',
	455	'info_dict': {
	456	'id': 'lqQg6PlCWgI',
	457	'ext': 'mp4',
	458	'upload_date': '20120731',
	459	'uploader_id': 'olympic',
	460	'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
	461	'uploader': 'Olympics',
	462	'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
	463	},
	464	'params': {
	465	'skip_download': 'requires avconv',
	466	}
	467	},
	468	]
	469
	470	def __init__(self, args, *kwargs):
	471	super(YoutubeIE, self).__init__(args, *kwargs)
	472	self._player_cache = {}
	473
	474	def report_video_info_webpage_download(self, video_id):
	475	"""Report attempt to download video info webpage."""
	476	self.to_screen('%s: Downloading video info webpage' % video_id)
	477
	478	def report_information_extraction(self, video_id):
	479	"""Report attempt to extract video information."""
	480	self.to_screen('%s: Extracting video information' % video_id)
	481
	482	def report_unavailable_format(self, video_id, format):
	483	"""Report extracted video URL."""
	484	self.to_screen('%s: Format %s not available' % (video_id, format))
	485
	486	def report_rtmp_download(self):
	487	"""Indicate the download will use the RTMP protocol."""
	488	self.to_screen('RTMP download detected')
	489
	490	def _signature_cache_id(self, example_sig):
	491	""" Return a string representation of a signature """
	492	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	493
	494	def _extract_signature_function(self, video_id, player_url, example_sig):
	495	id_m = re.match(
	496	r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	497	player_url)
	498	if not id_m:
	499	raise ExtractorError('Cannot identify player %r' % player_url)
	500	player_type = id_m.group('ext')
	501	player_id = id_m.group('id')
	502
	503	# Read from filesystem cache
	504	func_id = '%s_%s_%s' % (
	505	player_type, player_id, self._signature_cache_id(example_sig))
	506	assert os.path.basename(func_id) == func_id
	507
	508	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	509	if cache_spec is not None:
	510	return lambda s: ''.join(s[i] for i in cache_spec)
	511
	512	if player_type == 'js':
	513	code = self._download_webpage(
	514	player_url, video_id,
	515	note='Downloading %s player %s' % (player_type, player_id),
	516	errnote='Download of %s failed' % player_url)
	517	res = self._parse_sig_js(code)
	518	elif player_type == 'swf':
	519	urlh = self._request_webpage(
	520	player_url, video_id,
	521	note='Downloading %s player %s' % (player_type, player_id),
	522	errnote='Download of %s failed' % player_url)
	523	code = urlh.read()
	524	res = self._parse_sig_swf(code)
	525	else:
	526	assert False, 'Invalid player type %r' % player_type
	527
	528	if cache_spec is None:
	529	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	530	cache_res = res(test_string)
	531	cache_spec = [ord(c) for c in cache_res]
	532
	533	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	534	return res
	535
	536	def _print_sig_code(self, func, example_sig):
	537	def gen_sig_code(idxs):
	538	def _genslice(start, end, step):
	539	starts = '' if start == 0 else str(start)
	540	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	541	steps = '' if step == 1 else (':%d' % step)
	542	return 's[%s%s%s]' % (starts, ends, steps)
	543
	544	step = None
	545	# Quelch pyflakes warnings - start will be set when step is set
	546	start = '(Never used)'
	547	for i, prev in zip(idxs[1:], idxs[:-1]):
	548	if step is not None:
	549	if i - prev == step:
	550	continue
	551	yield _genslice(start, prev, step)
	552	step = None
	553	continue
	554	if i - prev in [-1, 1]:
	555	step = i - prev
	556	start = prev
	557	continue
	558	else:
	559	yield 's[%d]' % prev
	560	if step is None:
	561	yield 's[%d]' % i
	562	else:
	563	yield _genslice(start, i, step)
	564
	565	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	566	cache_res = func(test_string)
	567	cache_spec = [ord(c) for c in cache_res]
	568	expr_code = ' + '.join(gen_sig_code(cache_spec))
	569	signature_id_tuple = '(%s)' % (
	570	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	571	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	572	' return %s\n') % (signature_id_tuple, expr_code)
	573	self.to_screen('Extracted signature function:\n' + code)
	574
	575	def _parse_sig_js(self, jscode):
	576	funcname = self._search_regex(
	577	r'\.sig\\|\\|([a-zA-Z0-9]+)\(', jscode,
	578	'Initial JS player signature function name')
	579
	580	jsi = JSInterpreter(jscode)
	581	initial_function = jsi.extract_function(funcname)
	582	return lambda s: initial_function([s])
	583
	584	def _parse_sig_swf(self, file_contents):
	585	swfi = SWFInterpreter(file_contents)
	586	TARGET_CLASSNAME = 'SignatureDecipher'
	587	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	588	initial_function = swfi.extract_function(searched_class, 'decipher')
	589	return lambda s: initial_function([s])
	590
	591	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	592	"""Turn the encrypted s field into a working signature"""
	593
	594	if player_url is None:
	595	raise ExtractorError('Cannot decrypt signature without player_url')
	596
	597	if player_url.startswith('//'):
	598	player_url = 'https:' + player_url
	599	try:
	600	player_id = (player_url, self._signature_cache_id(s))
	601	if player_id not in self._player_cache:
	602	func = self._extract_signature_function(
	603	video_id, player_url, s
	604	)
	605	self._player_cache[player_id] = func
	606	func = self._player_cache[player_id]
	607	if self._downloader.params.get('youtube_print_sig_code'):
	608	self._print_sig_code(func, s)
	609	return func(s)
	610	except Exception as e:
	611	tb = traceback.format_exc()
	612	raise ExtractorError(
	613	'Signature extraction failed: ' + tb, cause=e)
	614
	615	def _get_available_subtitles(self, video_id, webpage):
	616	try:
	617	subs_doc = self._download_xml(
	618	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	619	video_id, note=False)
	620	except ExtractorError as err:
	621	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	622	return {}
	623
	624	sub_lang_list = {}
	625	for track in subs_doc.findall('track'):
	626	lang = track.attrib['lang_code']
	627	if lang in sub_lang_list:
	628	continue
	629	params = compat_urllib_parse.urlencode({
	630	'lang': lang,
	631	'v': video_id,
	632	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	633	'name': track.attrib['name'].encode('utf-8'),
	634	})
	635	url = 'https://www.youtube.com/api/timedtext?' + params
	636	sub_lang_list[lang] = url
	637	if not sub_lang_list:
	638	self._downloader.report_warning('video doesn\'t have subtitles')
	639	return {}
	640	return sub_lang_list
	641
	642	def _get_available_automatic_caption(self, video_id, webpage):
	643	"""We need the webpage for getting the captions url, pass it as an
	644	argument to speed up the process."""
	645	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	646	self.to_screen('%s: Looking for automatic captions' % video_id)
	647	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	648	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	649	if mobj is None:
	650	self._downloader.report_warning(err_msg)
	651	return {}
	652	player_config = json.loads(mobj.group(1))
	653	try:
	654	args = player_config['args']
	655	caption_url = args['ttsurl']
	656	timestamp = args['timestamp']
	657	# We get the available subtitles
	658	list_params = compat_urllib_parse.urlencode({
	659	'type': 'list',
	660	'tlangs': 1,
	661	'asrs': 1,
	662	})
	663	list_url = caption_url + '&' + list_params
	664	caption_list = self._download_xml(list_url, video_id)
	665	original_lang_node = caption_list.find('track')
	666	if original_lang_node is None:
	667	self._downloader.report_warning('Video doesn\'t have automatic captions')
	668	return {}
	669	original_lang = original_lang_node.attrib['lang_code']
	670	caption_kind = original_lang_node.attrib.get('kind', '')
	671
	672	sub_lang_list = {}
	673	for lang_node in caption_list.findall('target'):
	674	sub_lang = lang_node.attrib['lang_code']
	675	params = compat_urllib_parse.urlencode({
	676	'lang': original_lang,
	677	'tlang': sub_lang,
	678	'fmt': sub_format,
	679	'ts': timestamp,
	680	'kind': caption_kind,
	681	})
	682	sub_lang_list[sub_lang] = caption_url + '&' + params
	683	return sub_lang_list
	684	# An extractor error can be raise by the download process if there are
	685	# no automatic captions but there are subtitles
	686	except (KeyError, ExtractorError):
	687	self._downloader.report_warning(err_msg)
	688	return {}
	689
	690	@classmethod
	691	def extract_id(cls, url):
	692	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	693	if mobj is None:
	694	raise ExtractorError('Invalid URL: %s' % url)
	695	video_id = mobj.group(2)
	696	return video_id
	697
	698	def _extract_from_m3u8(self, manifest_url, video_id):
	699	url_map = {}
	700
	701	def _get_urls(_manifest):
	702	lines = _manifest.split('\n')
	703	urls = filter(lambda l: l and not l.startswith('#'),
	704	lines)
	705	return urls
	706	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	707	formats_urls = _get_urls(manifest)
	708	for format_url in formats_urls:
	709	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	710	url_map[itag] = format_url
	711	return url_map
	712
	713	def _extract_annotations(self, video_id):
	714	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	715	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	716
	717	def _parse_dash_manifest(
	718	self, video_id, dash_manifest_url, player_url, age_gate):
	719	def decrypt_sig(mobj):
	720	s = mobj.group(1)
	721	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	722	return '/signature/%s' % dec_s
	723	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	724	dash_doc = self._download_xml(
	725	dash_manifest_url, video_id,
	726	note='Downloading DASH manifest',
	727	errnote='Could not download DASH manifest')
	728
	729	formats = []
	730	for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	731	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	732	if url_el is None:
	733	continue
	734	format_id = r.attrib['id']
	735	video_url = url_el.text
	736	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	737	f = {
	738	'format_id': format_id,
	739	'url': video_url,
	740	'width': int_or_none(r.attrib.get('width')),
	741	'height': int_or_none(r.attrib.get('height')),
	742	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	743	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	744	'filesize': filesize,
	745	'fps': int_or_none(r.attrib.get('frameRate')),
	746	}
	747	try:
	748	existing_format = next(
	749	fo for fo in formats
	750	if fo['format_id'] == format_id)
	751	except StopIteration:
	752	f.update(self._formats.get(format_id, {}).items())
	753	formats.append(f)
	754	else:
	755	existing_format.update(f)
	756	return formats
	757
	758	def _real_extract(self, url):
	759	proto = (
	760	'http' if self._downloader.params.get('prefer_insecure', False)
	761	else 'https')
	762
	763	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	764	mobj = re.search(self._NEXT_URL_RE, url)
	765	if mobj:
	766	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	767	video_id = self.extract_id(url)
	768
	769	# Get video webpage
	770	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	771	video_webpage = self._download_webpage(url, video_id)
	772
	773	# Attempt to extract SWF player URL
	774	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	775	if mobj is not None:
	776	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	777	else:
	778	player_url = None
	779
	780	# Get video info
	781	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	782	age_gate = True
	783	# We simulate the access to the video from www.youtube.com/v/{video_id}
	784	# this can be viewed without login into Youtube
	785	url = proto + '://www.youtube.com/embed/%s' % video_id
	786	embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
	787	data = compat_urllib_parse.urlencode({
	788	'video_id': video_id,
	789	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	790	'sts': self._search_regex(
	791	r'"sts"\s:\s(\d+)', embed_webpage, 'sts', default=''),
	792	})
	793	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	794	video_info_webpage = self._download_webpage(
	795	video_info_url, video_id,
	796	note='Refetching age-gated info webpage',
	797	errnote='unable to download video info webpage')
	798	video_info = compat_parse_qs(video_info_webpage)
	799	else:
	800	age_gate = False
	801	try:
	802	# Try looking directly into the video webpage
	803	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	804	if not mobj:
	805	raise ValueError('Could not find ytplayer.config') # caught below
	806	json_code = uppercase_escape(mobj.group(1))
	807	ytplayer_config = json.loads(json_code)
	808	args = ytplayer_config['args']
	809	# Convert to the same format returned by compat_parse_qs
	810	video_info = dict((k, [v]) for k, v in args.items())
	811	if 'url_encoded_fmt_stream_map' not in args:
	812	raise ValueError('No stream_map present') # caught below
	813	except ValueError:
	814	# We fallback to the get_video_info pages (used by the embed page)
	815	self.report_video_info_webpage_download(video_id)
	816	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	817	video_info_url = (
	818	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	819	% (proto, video_id, el_type))
	820	video_info_webpage = self._download_webpage(
	821	video_info_url,
	822	video_id, note=False,
	823	errnote='unable to download video info webpage')
	824	video_info = compat_parse_qs(video_info_webpage)
	825	if 'token' in video_info:
	826	break
	827	if 'token' not in video_info:
	828	if 'reason' in video_info:
	829	raise ExtractorError(
	830	'YouTube said: %s' % video_info['reason'][0],
	831	expected=True, video_id=video_id)
	832	else:
	833	raise ExtractorError(
	834	'"token" parameter not in video info for unknown reason',
	835	video_id=video_id)
	836
	837	if 'view_count' in video_info:
	838	view_count = int(video_info['view_count'][0])
	839	else:
	840	view_count = None
	841
	842	# Check for "rental" videos
	843	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	844	raise ExtractorError('"rental" videos not supported')
	845
	846	# Start extracting information
	847	self.report_information_extraction(video_id)
	848
	849	# uploader
	850	if 'author' not in video_info:
	851	raise ExtractorError('Unable to extract uploader name')
	852	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	853
	854	# uploader_id
	855	video_uploader_id = None
	856	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	857	if mobj is not None:
	858	video_uploader_id = mobj.group(1)
	859	else:
	860	self._downloader.report_warning('unable to extract uploader nickname')
	861
	862	# title
	863	if 'title' in video_info:
	864	video_title = video_info['title'][0]
	865	else:
	866	self._downloader.report_warning('Unable to extract video title')
	867	video_title = '_'
	868
	869	# thumbnail image
	870	# We try first to get a high quality image:
	871	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	872	video_webpage, re.DOTALL)
	873	if m_thumb is not None:
	874	video_thumbnail = m_thumb.group(1)
	875	elif 'thumbnail_url' not in video_info:
	876	self._downloader.report_warning('unable to extract video thumbnail')
	877	video_thumbnail = None
	878	else: # don't panic if we can't find it
	879	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	880
	881	# upload date
	882	upload_date = None
	883	mobj = re.search(r'(?s)id="eow-date.?>(.?)</span>', video_webpage)
	884	if mobj is None:
	885	mobj = re.search(
	886	r'(?s)id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live) on (.*?)</strong>',
	887	video_webpage)
	888	if mobj is not None:
	889	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	890	upload_date = unified_strdate(upload_date)
	891
	892	m_cat_container = self._search_regex(
	893	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	894	video_webpage, 'categories', default=None)
	895	if m_cat_container:
	896	category = self._html_search_regex(
	897	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	898	default=None)
	899	video_categories = None if category is None else [category]
	900	else:
	901	video_categories = None
	902
	903	# description
	904	video_description = get_element_by_id("eow-description", video_webpage)
	905	if video_description:
	906	video_description = re.sub(r'''(?x)
	907	<a\s+
	908	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	909	title="([^"]+)"\s+
	910	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	911	class="yt-uix-redirect-link"\s*>
	912	[^<]+
	913	</a>
	914	''', r'\1', video_description)
	915	video_description = clean_html(video_description)
	916	else:
	917	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	918	if fd_mobj:
	919	video_description = unescapeHTML(fd_mobj.group(1))
	920	else:
	921	video_description = ''
	922
	923	def _extract_count(count_name):
	924	count = self._search_regex(
	925	r'id="watch-%s"[^>]>.?([\d,]+)\s*</span>' % re.escape(count_name),
	926	video_webpage, count_name, default=None)
	927	if count is not None:
	928	return int(count.replace(',', ''))
	929	return None
	930	like_count = _extract_count('like')
	931	dislike_count = _extract_count('dislike')
	932
	933	# subtitles
	934	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	935
	936	if self._downloader.params.get('listsubtitles', False):
	937	self._list_available_subtitles(video_id, video_webpage)
	938	return
	939
	940	if 'length_seconds' not in video_info:
	941	self._downloader.report_warning('unable to extract video duration')
	942	video_duration = None
	943	else:
	944	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	945
	946	# annotations
	947	video_annotations = None
	948	if self._downloader.params.get('writeannotations', False):
	949	video_annotations = self._extract_annotations(video_id)
	950
	951	def _map_to_format_list(urlmap):
	952	formats = []
	953	for itag, video_real_url in urlmap.items():
	954	dct = {
	955	'format_id': itag,
	956	'url': video_real_url,
	957	'player_url': player_url,
	958	}
	959	if itag in self._formats:
	960	dct.update(self._formats[itag])
	961	formats.append(dct)
	962	return formats
	963
	964	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	965	self.report_rtmp_download()
	966	formats = [{
	967	'format_id': '_rtmp',
	968	'protocol': 'rtmp',
	969	'url': video_info['conn'][0],
	970	'player_url': player_url,
	971	}]
	972	elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
	973	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	974	if 'rtmpe%3Dyes' in encoded_url_map:
	975	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	976	url_map = {}
	977	for url_data_str in encoded_url_map.split(','):
	978	url_data = compat_parse_qs(url_data_str)
	979	if 'itag' not in url_data or 'url' not in url_data:
	980	continue
	981	format_id = url_data['itag'][0]
	982	url = url_data['url'][0]
	983
	984	if 'sig' in url_data:
	985	url += '&signature=' + url_data['sig'][0]
	986	elif 's' in url_data:
	987	encrypted_sig = url_data['s'][0]
	988
	989	jsplayer_url_json = self._search_regex(
	990	r'"assets":.+?"js":\s*("[^"]+")',
	991	embed_webpage if age_gate else video_webpage, 'JS player URL')
	992	player_url = json.loads(jsplayer_url_json)
	993	if player_url is None:
	994	player_url_json = self._search_regex(
	995	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	996	video_webpage, 'age gate player URL')
	997	player_url = json.loads(player_url_json)
	998
	999	if self._downloader.params.get('verbose'):
	1000	if player_url is None:
	1001	player_version = 'unknown'
	1002	player_desc = 'unknown'
	1003	else:
	1004	if player_url.endswith('swf'):
	1005	player_version = self._search_regex(
	1006	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	1007	'flash player', fatal=False)
	1008	player_desc = 'flash player %s' % player_version
	1009	else:
	1010	player_version = self._search_regex(
	1011	r'html5player-([^/]+?)(?:/html5player)?\.js',
	1012	player_url,
	1013	'html5 player', fatal=False)
	1014	player_desc = 'html5 player %s' % player_version
	1015
	1016	parts_sizes = self._signature_cache_id(encrypted_sig)
	1017	self.to_screen('{%s} signature length %s, %s' %
	1018	(format_id, parts_sizes, player_desc))
	1019
	1020	signature = self._decrypt_signature(
	1021	encrypted_sig, video_id, player_url, age_gate)
	1022	url += '&signature=' + signature
	1023	if 'ratebypass' not in url:
	1024	url += '&ratebypass=yes'
	1025	url_map[format_id] = url
	1026	formats = _map_to_format_list(url_map)
	1027	elif video_info.get('hlsvp'):
	1028	manifest_url = video_info['hlsvp'][0]
	1029	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1030	formats = _map_to_format_list(url_map)
	1031	else:
	1032	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1033
	1034	# Look for the DASH manifest
	1035	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1036	dash_mpd = video_info.get('dashmpd')
	1037	if dash_mpd:
	1038	dash_manifest_url = dash_mpd[0]
	1039	try:
	1040	dash_formats = self._parse_dash_manifest(
	1041	video_id, dash_manifest_url, player_url, age_gate)
	1042	except (ExtractorError, KeyError) as e:
	1043	self.report_warning(
	1044	'Skipping DASH manifest: %r' % e, video_id)
	1045	else:
	1046	# Hide the formats we found through non-DASH
	1047	dash_keys = set(df['format_id'] for df in dash_formats)
	1048	for f in formats:
	1049	if f['format_id'] in dash_keys:
	1050	f['format_id'] = 'nondash-%s' % f['format_id']
	1051	f['preference'] = f.get('preference', 0) - 10000
	1052	formats.extend(dash_formats)
	1053
	1054	self._sort_formats(formats)
	1055
	1056	return {
	1057	'id': video_id,
	1058	'uploader': video_uploader,
	1059	'uploader_id': video_uploader_id,
	1060	'upload_date': upload_date,
	1061	'title': video_title,
	1062	'thumbnail': video_thumbnail,
	1063	'description': video_description,
	1064	'categories': video_categories,
	1065	'subtitles': video_subtitles,
	1066	'duration': video_duration,
	1067	'age_limit': 18 if age_gate else 0,
	1068	'annotations': video_annotations,
	1069	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1070	'view_count': view_count,
	1071	'like_count': like_count,
	1072	'dislike_count': dislike_count,
	1073	'formats': formats,
	1074	}
	1075
	1076
	1077	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1078	IE_DESC = 'YouTube.com playlists'
	1079	_VALID_URL = r"""(?x)(?:
	1080	(?:https?://)?
	1081	(?:\w+\.)?
	1082	youtube\.com/
	1083	(?:
	1084	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1085	\? (?:.?&)? (?:p\|a\|list)=
	1086	\| p/
	1087	)
	1088	(
	1089	(?:PL\|LL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1090	# Top tracks, they can also include dots
	1091	\|(?:MC)[\w\.]*
	1092	)
	1093	.*
	1094	\|
	1095	((?:PL\|LL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1096	)"""
	1097	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1098	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1099	IE_NAME = 'youtube:playlist'
	1100	_TESTS = [{
	1101	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1102	'info_dict': {
	1103	'title': 'ytdl test PL',
	1104	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1105	},
	1106	'playlist_count': 3,
	1107	}, {
	1108	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1109	'info_dict': {
	1110	'title': 'YDL_Empty_List',
	1111	},
	1112	'playlist_count': 0,
	1113	}, {
	1114	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1115	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1116	'info_dict': {
	1117	'title': '29C3: Not my department',
	1118	},
	1119	'playlist_count': 95,
	1120	}, {
	1121	'note': 'issue #673',
	1122	'url': 'PLBB231211A4F62143',
	1123	'info_dict': {
	1124	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1125	},
	1126	'playlist_mincount': 26,
	1127	}, {
	1128	'note': 'Large playlist',
	1129	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1130	'info_dict': {
	1131	'title': 'Uploads from Cauchemar',
	1132	},
	1133	'playlist_mincount': 799,
	1134	}, {
	1135	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1136	'info_dict': {
	1137	'title': 'YDL_safe_search',
	1138	},
	1139	'playlist_count': 2,
	1140	}, {
	1141	'note': 'embedded',
	1142	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1143	'playlist_count': 4,
	1144	'info_dict': {
	1145	'title': 'JODA15',
	1146	}
	1147	}, {
	1148	'note': 'Embedded SWF player',
	1149	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1150	'playlist_count': 4,
	1151	'info_dict': {
	1152	'title': 'JODA7',
	1153	}
	1154	}, {
	1155	'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
	1156	'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
	1157	'info_dict': {
	1158	'title': 'Uploads from Interstellar Movie',
	1159	},
	1160	'playlist_mincout': 21,
	1161	}]
	1162
	1163	def _real_initialize(self):
	1164	self._login()
	1165
	1166	def _ids_to_results(self, ids):
	1167	return [
	1168	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1169	for vid_id in ids]
	1170
	1171	def _extract_mix(self, playlist_id):
	1172	# The mixes are generated from a a single video
	1173	# the id of the playlist is just 'RD' + video_id
	1174	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1175	webpage = self._download_webpage(
	1176	url, playlist_id, 'Downloading Youtube mix')
	1177	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1178	title_span = (
	1179	search_title('playlist-title') or
	1180	search_title('title long-title') or
	1181	search_title('title'))
	1182	title = clean_html(title_span)
	1183	ids = orderedSet(re.findall(
	1184	r'''(?xs)data-video-username=".?".?
	1185	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1186	webpage))
	1187	url_results = self._ids_to_results(ids)
	1188
	1189	return self.playlist_result(url_results, playlist_id, title)
	1190
	1191	def _real_extract(self, url):
	1192	# Extract playlist id
	1193	mobj = re.match(self._VALID_URL, url)
	1194	if mobj is None:
	1195	raise ExtractorError('Invalid URL: %s' % url)
	1196	playlist_id = mobj.group(1) or mobj.group(2)
	1197
	1198	# Check if it's a video-specific URL
	1199	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1200	if 'v' in query_dict:
	1201	video_id = query_dict['v'][0]
	1202	if self._downloader.params.get('noplaylist'):
	1203	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1204	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1205	else:
	1206	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1207
	1208	if playlist_id.startswith('RD'):
	1209	# Mixes require a custom extraction process
	1210	return self._extract_mix(playlist_id)
	1211
	1212	url = self._TEMPLATE_URL % playlist_id
	1213	page = self._download_webpage(url, playlist_id)
	1214	more_widget_html = content_html = page
	1215
	1216	# Check if the playlist exists or is private
	1217	if re.search(r'<div class="yt-alert-message">[^<]?(The\|This) playlist (does not exist\|is private)[^<]?</div>', page) is not None:
	1218	raise ExtractorError(
	1219	'The playlist doesn\'t exist or is private, use --username or '
	1220	'--netrc to access it.',
	1221	expected=True)
	1222
	1223	# Extract the video ids from the playlist pages
	1224	ids = []
	1225
	1226	for page_num in itertools.count(1):
	1227	matches = re.finditer(self._VIDEO_RE, content_html)
	1228	# We remove the duplicates and the link with index 0
	1229	# (it's not the first video of the playlist)
	1230	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1231	ids.extend(new_ids)
	1232
	1233	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1234	if not mobj:
	1235	break
	1236
	1237	more = self._download_json(
	1238	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1239	'Downloading page #%s' % page_num,
	1240	transform_source=uppercase_escape)
	1241	content_html = more['content_html']
	1242	if not content_html.strip():
	1243	# Some webpages show a "Load more" button but they don't
	1244	# have more videos
	1245	break
	1246	more_widget_html = more['load_more_widget_html']
	1247
	1248	playlist_title = self._html_search_regex(
	1249	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1250	page, 'title')
	1251
	1252	url_results = self._ids_to_results(ids)
	1253	return self.playlist_result(url_results, playlist_id, playlist_title)
	1254
	1255
	1256	class YoutubeChannelIE(InfoExtractor):
	1257	IE_DESC = 'YouTube.com channels'
	1258	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1259	IE_NAME = 'youtube:channel'
	1260	_TESTS = [{
	1261	'note': 'paginated channel',
	1262	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1263	'playlist_mincount': 91,
	1264	}]
	1265
	1266	def extract_videos_from_page(self, page):
	1267	ids_in_page = []
	1268	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1269	if mobj.group(1) not in ids_in_page:
	1270	ids_in_page.append(mobj.group(1))
	1271	return ids_in_page
	1272
	1273	def _real_extract(self, url):
	1274	channel_id = self._match_id(url)
	1275
	1276	video_ids = []
	1277	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1278	channel_page = self._download_webpage(url, channel_id)
	1279	autogenerated = re.search(r'''(?x)
	1280	class="[^"]*?(?:
	1281	channel-header-autogenerated-label\|
	1282	yt-channel-title-autogenerated
	1283	)[^"]*"''', channel_page) is not None
	1284
	1285	if autogenerated:
	1286	# The videos are contained in a single page
	1287	# the ajax pages can't be used, they are empty
	1288	video_ids = self.extract_videos_from_page(channel_page)
	1289	entries = [
	1290	self.url_result(video_id, 'Youtube', video_id=video_id)
	1291	for video_id in video_ids]
	1292	return self.playlist_result(entries, channel_id)
	1293
	1294	def _entries():
	1295	more_widget_html = content_html = channel_page
	1296	for pagenum in itertools.count(1):
	1297
	1298	ids_in_page = self.extract_videos_from_page(content_html)
	1299	for video_id in ids_in_page:
	1300	yield self.url_result(
	1301	video_id, 'Youtube', video_id=video_id)
	1302
	1303	mobj = re.search(
	1304	r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
	1305	more_widget_html)
	1306	if not mobj:
	1307	break
	1308
	1309	more = self._download_json(
	1310	'https://youtube.com/%s' % mobj.group('more'), channel_id,
	1311	'Downloading page #%s' % (pagenum + 1),
	1312	transform_source=uppercase_escape)
	1313	content_html = more['content_html']
	1314	more_widget_html = more['load_more_widget_html']
	1315
	1316	return self.playlist_result(_entries(), channel_id)
	1317
	1318
	1319	class YoutubeUserIE(InfoExtractor):
	1320	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1321	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1322	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1323	_GDATA_PAGE_SIZE = 50
	1324	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1325	IE_NAME = 'youtube:user'
	1326
	1327	_TESTS = [{
	1328	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1329	'playlist_mincount': 320,
	1330	'info_dict': {
	1331	'title': 'TheLinuxFoundation',
	1332	}
	1333	}, {
	1334	'url': 'ytuser:phihag',
	1335	'only_matching': True,
	1336	}]
	1337
	1338	@classmethod
	1339	def suitable(cls, url):
	1340	# Don't return True if the url can be extracted with other youtube
	1341	# extractor, the regex would is too permissive and it would match.
	1342	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1343	if any(ie.suitable(url) for ie in other_ies):
	1344	return False
	1345	else:
	1346	return super(YoutubeUserIE, cls).suitable(url)
	1347
	1348	def _real_extract(self, url):
	1349	username = self._match_id(url)
	1350
	1351	# Download video ids using YouTube Data API. Result size per
	1352	# query is limited (currently to 50 videos) so we need to query
	1353	# page by page until there are no video ids - it means we got
	1354	# all of them.
	1355
	1356	def download_page(pagenum):
	1357	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1358
	1359	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1360	page = self._download_webpage(
	1361	gdata_url, username,
	1362	'Downloading video ids from %d to %d' % (
	1363	start_index, start_index + self._GDATA_PAGE_SIZE))
	1364
	1365	try:
	1366	response = json.loads(page)
	1367	except ValueError as err:
	1368	raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
	1369	if 'entry' not in response['feed']:
	1370	return
	1371
	1372	# Extract video identifiers
	1373	entries = response['feed']['entry']
	1374	for entry in entries:
	1375	title = entry['title']['$t']
	1376	video_id = entry['id']['$t'].split('/')[-1]
	1377	yield {
	1378	'_type': 'url',
	1379	'url': video_id,
	1380	'ie_key': 'Youtube',
	1381	'id': video_id,
	1382	'title': title,
	1383	}
	1384	url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
	1385
	1386	return self.playlist_result(url_results, playlist_title=username)
	1387
	1388
	1389	class YoutubeSearchIE(SearchInfoExtractor):
	1390	IE_DESC = 'YouTube.com searches'
	1391	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1392	_MAX_RESULTS = 1000
	1393	IE_NAME = 'youtube:search'
	1394	_SEARCH_KEY = 'ytsearch'
	1395
	1396	def _get_n_results(self, query, n):
	1397	"""Get a specified number of results for a query"""
	1398
	1399	video_ids = []
	1400	pagenum = 0
	1401	limit = n
	1402	PAGE_SIZE = 50
	1403
	1404	while (PAGE_SIZE * pagenum) < limit:
	1405	result_url = self._API_URL % (
	1406	compat_urllib_parse.quote_plus(query.encode('utf-8')),
	1407	(PAGE_SIZE * pagenum) + 1)
	1408	data_json = self._download_webpage(
	1409	result_url, video_id='query "%s"' % query,
	1410	note='Downloading page %s' % (pagenum + 1),
	1411	errnote='Unable to download API page')
	1412	data = json.loads(data_json)
	1413	api_response = data['data']
	1414
	1415	if 'items' not in api_response:
	1416	raise ExtractorError(
	1417	'[youtube] No video results', expected=True)
	1418
	1419	new_ids = list(video['id'] for video in api_response['items'])
	1420	video_ids += new_ids
	1421
	1422	limit = min(n, api_response['totalItems'])
	1423	pagenum += 1
	1424
	1425	if len(video_ids) > n:
	1426	video_ids = video_ids[:n]
	1427	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1428	for video_id in video_ids]
	1429	return self.playlist_result(videos, query)
	1430
	1431
	1432	class YoutubeSearchDateIE(YoutubeSearchIE):
	1433	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1434	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1435	_SEARCH_KEY = 'ytsearchdate'
	1436	IE_DESC = 'YouTube.com searches, newest videos first'
	1437
	1438
	1439	class YoutubeSearchURLIE(InfoExtractor):
	1440	IE_DESC = 'YouTube.com search URLs'
	1441	IE_NAME = 'youtube:search_url'
	1442	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1443	_TESTS = [{
	1444	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1445	'playlist_mincount': 5,
	1446	'info_dict': {
	1447	'title': 'youtube-dl test video',
	1448	}
	1449	}]
	1450
	1451	def _real_extract(self, url):
	1452	mobj = re.match(self._VALID_URL, url)
	1453	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1454
	1455	webpage = self._download_webpage(url, query)
	1456	result_code = self._search_regex(
	1457	r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1458
	1459	part_codes = re.findall(
	1460	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1461	entries = []
	1462	for part_code in part_codes:
	1463	part_title = self._html_search_regex(
	1464	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1465	part_url_snippet = self._html_search_regex(
	1466	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1467	part_url = compat_urlparse.urljoin(
	1468	'https://www.youtube.com/', part_url_snippet)
	1469	entries.append({
	1470	'_type': 'url',
	1471	'url': part_url,
	1472	'title': part_title,
	1473	})
	1474
	1475	return {
	1476	'_type': 'playlist',
	1477	'entries': entries,
	1478	'title': query,
	1479	}
	1480
	1481
	1482	class YoutubeShowIE(InfoExtractor):
	1483	IE_DESC = 'YouTube.com (multi-season) shows'
	1484	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1485	IE_NAME = 'youtube:show'
	1486	_TESTS = [{
	1487	'url': 'http://www.youtube.com/show/airdisasters',
	1488	'playlist_mincount': 3,
	1489	'info_dict': {
	1490	'id': 'airdisasters',
	1491	'title': 'Air Disasters',
	1492	}
	1493	}]
	1494
	1495	def _real_extract(self, url):
	1496	mobj = re.match(self._VALID_URL, url)
	1497	playlist_id = mobj.group('id')
	1498	webpage = self._download_webpage(
	1499	url, playlist_id, 'Downloading show webpage')
	1500	# There's one playlist for each season of the show
	1501	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1502	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1503	entries = [
	1504	self.url_result(
	1505	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1506	for season in m_seasons
	1507	]
	1508	title = self._og_search_title(webpage, fatal=False)
	1509
	1510	return {
	1511	'_type': 'playlist',
	1512	'id': playlist_id,
	1513	'title': title,
	1514	'entries': entries,
	1515	}
	1516
	1517
	1518	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1519	"""
	1520	Base class for extractors that fetch info from
	1521	http://www.youtube.com/feed_ajax
	1522	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1523	"""
	1524	_LOGIN_REQUIRED = True
	1525	# use action_load_personal_feed instead of action_load_system_feed
	1526	_PERSONAL_FEED = False
	1527
	1528	@property
	1529	def _FEED_TEMPLATE(self):
	1530	action = 'action_load_system_feed'
	1531	if self._PERSONAL_FEED:
	1532	action = 'action_load_personal_feed'
	1533	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1534
	1535	@property
	1536	def IE_NAME(self):
	1537	return 'youtube:%s' % self._FEED_NAME
	1538
	1539	def _real_initialize(self):
	1540	self._login()
	1541
	1542	def _real_extract(self, url):
	1543	feed_entries = []
	1544	paging = 0
	1545	for i in itertools.count(1):
	1546	info = self._download_json(
	1547	self._FEED_TEMPLATE % paging,
	1548	'%s feed' % self._FEED_NAME,
	1549	'Downloading page %s' % i,
	1550	transform_source=uppercase_escape)
	1551	feed_html = info.get('feed_html') or info.get('content_html')
	1552	load_more_widget_html = info.get('load_more_widget_html') or feed_html
	1553	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1554	ids = orderedSet(m.group(1) for m in m_ids)
	1555	feed_entries.extend(
	1556	self.url_result(video_id, 'Youtube', video_id=video_id)
	1557	for video_id in ids)
	1558	mobj = re.search(
	1559	r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
	1560	load_more_widget_html)
	1561	if mobj is None:
	1562	break
	1563	paging = mobj.group('paging')
	1564	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1565
	1566
	1567	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1568	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1569	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1570	_FEED_NAME = 'recommended'
	1571	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1572
	1573
	1574	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1575	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1576	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1577	_FEED_NAME = 'watch_later'
	1578	_PLAYLIST_TITLE = 'Youtube Watch Later'
	1579	_PERSONAL_FEED = True
	1580
	1581
	1582	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1583	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1584	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1585	_FEED_NAME = 'history'
	1586	_PERSONAL_FEED = True
	1587	_PLAYLIST_TITLE = 'Youtube Watch History'
	1588
	1589
	1590	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1591	IE_NAME = 'youtube:favorites'
	1592	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1593	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1594	_LOGIN_REQUIRED = True
	1595
	1596	def _real_extract(self, url):
	1597	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1598	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1599	return self.url_result(playlist_id, 'YoutubePlaylist')
	1600
	1601
	1602	class YoutubeSubscriptionsIE(YoutubePlaylistIE):
	1603	IE_NAME = 'youtube:subscriptions'
	1604	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1605	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1606	_TESTS = []
	1607
	1608	def _real_extract(self, url):
	1609	title = 'Youtube Subscriptions'
	1610	page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
	1611
	1612	# The extraction process is the same as for playlists, but the regex
	1613	# for the video ids doesn't contain an index
	1614	ids = []
	1615	more_widget_html = content_html = page
	1616
	1617	for page_num in itertools.count(1):
	1618	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1619	new_ids = orderedSet(matches)
	1620	ids.extend(new_ids)
	1621
	1622	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1623	if not mobj:
	1624	break
	1625
	1626	more = self._download_json(
	1627	'https://youtube.com/%s' % mobj.group('more'), title,
	1628	'Downloading page #%s' % page_num,
	1629	transform_source=uppercase_escape)
	1630	content_html = more['content_html']
	1631	more_widget_html = more['load_more_widget_html']
	1632
	1633	return {
	1634	'_type': 'playlist',
	1635	'title': title,
	1636	'entries': self._ids_to_results(ids),
	1637	}
	1638
	1639
	1640	class YoutubeTruncatedURLIE(InfoExtractor):
	1641	IE_NAME = 'youtube:truncated_url'
	1642	IE_DESC = False # Do not list
	1643	_VALID_URL = r'''(?x)
	1644	(?:https?://)?[^/]+/watch\?(?:
	1645	feature=[a-z_]+\|
	1646	annotation_id=annotation_[^&]+
	1647	)?$\|
	1648	(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
	1649	'''
	1650
	1651	_TESTS = [{
	1652	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1653	'only_matching': True,
	1654	}, {
	1655	'url': 'http://www.youtube.com/watch?',
	1656	'only_matching': True,
	1657	}]
	1658
	1659	def _real_extract(self, url):
	1660	raise ExtractorError(
	1661	'Did you forget to quote the URL? Remember that & is a meta '
	1662	'character in most shells, so you want to put the URL in quotes, '
	1663	'like youtube-dl '
	1664	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1665	' or simply youtube-dl BaW_jenozKc .',
	1666	expected=True)
	1667
	1668
	1669	class YoutubeTruncatedIDIE(InfoExtractor):
	1670	IE_NAME = 'youtube:truncated_id'
	1671	IE_DESC = False # Do not list
	1672	_VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
	1673
	1674	_TESTS = [{
	1675	'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
	1676	'only_matching': True,
	1677	}]
	1678
	1679	def _real_extract(self, url):
	1680	video_id = self._match_id(url)
	1681	raise ExtractorError(
	1682	'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
	1683	expected=True)