jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import traceback
	11
	12	from .common import InfoExtractor, SearchInfoExtractor
	13	from .subtitles import SubtitlesInfoExtractor
	14	from ..jsinterp import JSInterpreter
	15	from ..swfinterp import SWFInterpreter
	16	from ..utils import (
	17	compat_chr,
	18	compat_parse_qs,
	19	compat_urllib_parse,
	20	compat_urllib_request,
	21	compat_urlparse,
	22	compat_str,
	23
	24	clean_html,
	25	get_element_by_id,
	26	get_element_by_attribute,
	27	ExtractorError,
	28	int_or_none,
	29	OnDemandPagedList,
	30	unescapeHTML,
	31	unified_strdate,
	32	orderedSet,
	33	uppercase_escape,
	34	)
	35
	36	class YoutubeBaseInfoExtractor(InfoExtractor):
	37	"""Provide base functions for Youtube extractors"""
	38	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	39	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	40	_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	41	_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	42	_NETRC_MACHINE = 'youtube'
	43	# If True it will raise an error if no login info is provided
	44	_LOGIN_REQUIRED = False
	45
	46	def _set_language(self):
	47	return bool(self._download_webpage(
	48	self._LANG_URL, None,
	49	note='Setting language', errnote='unable to set language',
	50	fatal=False))
	51
	52	def _login(self):
	53	"""
	54	Attempt to log in to YouTube.
	55	True is returned if successful or skipped.
	56	False is returned if login failed.
	57
	58	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	59	"""
	60	(username, password) = self._get_login_info()
	61	# No authentication to be performed
	62	if username is None:
	63	if self._LOGIN_REQUIRED:
	64	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	65	return True
	66
	67	login_page = self._download_webpage(
	68	self._LOGIN_URL, None,
	69	note='Downloading login page',
	70	errnote='unable to fetch login page', fatal=False)
	71	if login_page is False:
	72	return
	73
	74	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	75	login_page, 'Login GALX parameter')
	76
	77	# Log in
	78	login_form_strs = {
	79	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	80	'Email': username,
	81	'GALX': galx,
	82	'Passwd': password,
	83
	84	'PersistentCookie': 'yes',
	85	'_utf8': '霱',
	86	'bgresponse': 'js_disabled',
	87	'checkConnection': '',
	88	'checkedDomains': 'youtube',
	89	'dnConn': '',
	90	'pstMsg': '0',
	91	'rmShown': '1',
	92	'secTok': '',
	93	'signIn': 'Sign in',
	94	'timeStmp': '',
	95	'service': 'youtube',
	96	'uilel': '3',
	97	'hl': 'en_US',
	98	}
	99
	100	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	101	# chokes on unicode
	102	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
	103	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	104
	105	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	106	login_results = self._download_webpage(
	107	req, None,
	108	note='Logging in', errnote='unable to log in', fatal=False)
	109	if login_results is False:
	110	return False
	111
	112	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	113	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	114
	115	# Two-Factor
	116	# TODO add SMS and phone call support - these require making a request and then prompting the user
	117
	118	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	119	tfa_code = self._get_tfa_info()
	120
	121	if tfa_code is None:
	122	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	123	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	124	return False
	125
	126	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	127
	128	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	129	if match is None:
	130	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	131	secTok = match.group(1)
	132	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	133	if match is None:
	134	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	135	timeStmp = match.group(1)
	136
	137	tfa_form_strs = {
	138	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	139	'smsToken': '',
	140	'smsUserPin': tfa_code,
	141	'smsVerifyPin': 'Verify',
	142
	143	'PersistentCookie': 'yes',
	144	'checkConnection': '',
	145	'checkedDomains': 'youtube',
	146	'pstMsg': '1',
	147	'secTok': secTok,
	148	'timeStmp': timeStmp,
	149	'service': 'youtube',
	150	'hl': 'en_US',
	151	}
	152	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
	153	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	154
	155	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	156	tfa_results = self._download_webpage(
	157	tfa_req, None,
	158	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	159
	160	if tfa_results is False:
	161	return False
	162
	163	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	164	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	165	return False
	166	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	167	self._downloader.report_warning('unable to log in - did the page structure change?')
	168	return False
	169	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	170	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	171	return False
	172
	173	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	174	self._downloader.report_warning('unable to log in: bad username or password')
	175	return False
	176	return True
	177
	178	def _confirm_age(self):
	179	age_form = {
	180	'next_url': '/',
	181	'action_confirm': 'Confirm',
	182	}
	183	req = compat_urllib_request.Request(self._AGE_URL,
	184	compat_urllib_parse.urlencode(age_form).encode('ascii'))
	185
	186	self._download_webpage(
	187	req, None,
	188	note='Confirming age', errnote='Unable to confirm age',
	189	fatal=False)
	190
	191	def _real_initialize(self):
	192	if self._downloader is None:
	193	return
	194	if self._get_login_info()[0] is not None:
	195	if not self._set_language():
	196	return
	197	if not self._login():
	198	return
	199	self._confirm_age()
	200
	201
	202	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	203	IE_DESC = 'YouTube.com'
	204	_VALID_URL = r"""(?x)^
	205	(
	206	(?:https?://\|//) # http(s):// or protocol-independent URL
	207	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	208	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	209	(?:www\.)?pwnyoutube\.com/\|
	210	(?:www\.)?yourepeat\.com/\|
	211	tube\.majestyc\.net/\|
	212	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	213	(?:.*?\#/)? # handle anchor (#/) redirect urls
	214	(?: # the various things that can precede the ID:
	215	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	216	\|(?: # or the v= param in all its forms
	217	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	218	(?:\?\|\#!?) # the params delimiter ? or # or #!
	219	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	220	v=
	221	)
	222	))
	223	\|youtu\.be/ # just youtu.be/xxxx
	224	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	225	)
	226	)? # all until now is optional -> you can pass the naked ID
	227	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	228	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	229	(?(1).+)? # if we found the ID, everything can follow
	230	$"""
	231	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	232	_formats = {
	233	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	234	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	235	'13': {'ext': '3gp'},
	236	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	237	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	238	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	239	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	240	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	241	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	242	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	243	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	244	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	245	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	246	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	247	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	248
	249
	250	# 3d videos
	251	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	252	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	253	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	254	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	255	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	256	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	257	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	258
	259	# Apple HTTP Live Streaming
	260	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	261	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	262	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	263	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	264	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	265	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	266	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	267
	268	# DASH mp4 video
	269	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	270	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	271	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	272	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	273	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	274	'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	275	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	276	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	277	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	278	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	279	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	280
	281	# Dash mp4 audio
	282	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
	283	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
	284	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
	285
	286	# Dash webm
	287	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	288	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	289	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	290	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	291	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	292	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	293	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
	294	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	295	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	296	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	297	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	298	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	299	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	300	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	301	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	302	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	303	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	304	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	305
	306	# Dash webm audio
	307	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	308	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	309
	310	# Dash webm audio with opus inside
	311	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	312	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	313	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	314
	315	# RTMP (unnamed)
	316	'_rtmp': {'protocol': 'rtmp'},
	317	}
	318
	319	IE_NAME = 'youtube'
	320	_TESTS = [
	321	{
	322	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
	323	'info_dict': {
	324	'id': 'BaW_jenozKc',
	325	'ext': 'mp4',
	326	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	327	'uploader': 'Philipp Hagemeister',
	328	'uploader_id': 'phihag',
	329	'upload_date': '20121002',
	330	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	331	'categories': ['Science & Technology'],
	332	'like_count': int,
	333	'dislike_count': int,
	334	}
	335	},
	336	{
	337	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	338	'note': 'Test generic use_cipher_signature video (#897)',
	339	'info_dict': {
	340	'id': 'UxxajLWwzqY',
	341	'ext': 'mp4',
	342	'upload_date': '20120506',
	343	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	344	'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
	345	'uploader': 'Icona Pop',
	346	'uploader_id': 'IconaPop',
	347	}
	348	},
	349	{
	350	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	351	'note': 'Test VEVO video with age protection (#956)',
	352	'info_dict': {
	353	'id': '07FYdnEawAQ',
	354	'ext': 'mp4',
	355	'upload_date': '20130703',
	356	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	357	'description': 'md5:64249768eec3bc4276236606ea996373',
	358	'uploader': 'justintimberlakeVEVO',
	359	'uploader_id': 'justintimberlakeVEVO',
	360	}
	361	},
	362	{
	363	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	364	'note': 'Embed-only video (#1746)',
	365	'info_dict': {
	366	'id': 'yZIXLfi8CZQ',
	367	'ext': 'mp4',
	368	'upload_date': '20120608',
	369	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	370	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	371	'uploader': 'SET India',
	372	'uploader_id': 'setindia'
	373	}
	374	},
	375	{
	376	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	377	'note': '256k DASH audio (format 141) via DASH manifest',
	378	'info_dict': {
	379	'id': 'a9LDPn-MO4I',
	380	'ext': 'm4a',
	381	'upload_date': '20121002',
	382	'uploader_id': '8KVIDEO',
	383	'description': '',
	384	'uploader': '8KVIDEO',
	385	'title': 'UHDTV TEST 8K VIDEO.mp4'
	386	},
	387	'params': {
	388	'youtube_include_dash_manifest': True,
	389	'format': '141',
	390	},
	391	},
	392	# DASH manifest with encrypted signature
	393	{
	394	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	395	'info_dict': {
	396	'id': 'IB3lcPjvWLA',
	397	'ext': 'm4a',
	398	'title': 'Afrojack - The Spark ft. Spree Wilson',
	399	'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
	400	'uploader': 'AfrojackVEVO',
	401	'uploader_id': 'AfrojackVEVO',
	402	'upload_date': '20131011',
	403	},
	404	'params': {
	405	'youtube_include_dash_manifest': True,
	406	'format': '141',
	407	},
	408	},
	409	]
	410
	411	def __init__(self, args, *kwargs):
	412	super(YoutubeIE, self).__init__(args, *kwargs)
	413	self._player_cache = {}
	414
	415	def report_video_info_webpage_download(self, video_id):
	416	"""Report attempt to download video info webpage."""
	417	self.to_screen('%s: Downloading video info webpage' % video_id)
	418
	419	def report_information_extraction(self, video_id):
	420	"""Report attempt to extract video information."""
	421	self.to_screen('%s: Extracting video information' % video_id)
	422
	423	def report_unavailable_format(self, video_id, format):
	424	"""Report extracted video URL."""
	425	self.to_screen('%s: Format %s not available' % (video_id, format))
	426
	427	def report_rtmp_download(self):
	428	"""Indicate the download will use the RTMP protocol."""
	429	self.to_screen('RTMP download detected')
	430
	431	def _signature_cache_id(self, example_sig):
	432	""" Return a string representation of a signature """
	433	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	434
	435	def _extract_signature_function(self, video_id, player_url, example_sig):
	436	id_m = re.match(
	437	r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	438	player_url)
	439	if not id_m:
	440	raise ExtractorError('Cannot identify player %r' % player_url)
	441	player_type = id_m.group('ext')
	442	player_id = id_m.group('id')
	443
	444	# Read from filesystem cache
	445	func_id = '%s_%s_%s' % (
	446	player_type, player_id, self._signature_cache_id(example_sig))
	447	assert os.path.basename(func_id) == func_id
	448
	449	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	450	if cache_spec is not None:
	451	return lambda s: ''.join(s[i] for i in cache_spec)
	452
	453	if player_type == 'js':
	454	code = self._download_webpage(
	455	player_url, video_id,
	456	note='Downloading %s player %s' % (player_type, player_id),
	457	errnote='Download of %s failed' % player_url)
	458	res = self._parse_sig_js(code)
	459	elif player_type == 'swf':
	460	urlh = self._request_webpage(
	461	player_url, video_id,
	462	note='Downloading %s player %s' % (player_type, player_id),
	463	errnote='Download of %s failed' % player_url)
	464	code = urlh.read()
	465	res = self._parse_sig_swf(code)
	466	else:
	467	assert False, 'Invalid player type %r' % player_type
	468
	469	if cache_spec is None:
	470	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	471	cache_res = res(test_string)
	472	cache_spec = [ord(c) for c in cache_res]
	473
	474	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	475	return res
	476
	477	def _print_sig_code(self, func, example_sig):
	478	def gen_sig_code(idxs):
	479	def _genslice(start, end, step):
	480	starts = '' if start == 0 else str(start)
	481	ends = (':%d' % (end+step)) if end + step >= 0 else ':'
	482	steps = '' if step == 1 else (':%d' % step)
	483	return 's[%s%s%s]' % (starts, ends, steps)
	484
	485	step = None
	486	start = '(Never used)' # Quelch pyflakes warnings - start will be
	487	# set as soon as step is set
	488	for i, prev in zip(idxs[1:], idxs[:-1]):
	489	if step is not None:
	490	if i - prev == step:
	491	continue
	492	yield _genslice(start, prev, step)
	493	step = None
	494	continue
	495	if i - prev in [-1, 1]:
	496	step = i - prev
	497	start = prev
	498	continue
	499	else:
	500	yield 's[%d]' % prev
	501	if step is None:
	502	yield 's[%d]' % i
	503	else:
	504	yield _genslice(start, i, step)
	505
	506	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	507	cache_res = func(test_string)
	508	cache_spec = [ord(c) for c in cache_res]
	509	expr_code = ' + '.join(gen_sig_code(cache_spec))
	510	signature_id_tuple = '(%s)' % (
	511	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	512	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	513	' return %s\n') % (signature_id_tuple, expr_code)
	514	self.to_screen('Extracted signature function:\n' + code)
	515
	516	def _parse_sig_js(self, jscode):
	517	funcname = self._search_regex(
	518	r'\.sig\\|\\|([a-zA-Z0-9]+)\(', jscode,
	519	'Initial JS player signature function name')
	520
	521	jsi = JSInterpreter(jscode)
	522	initial_function = jsi.extract_function(funcname)
	523	return lambda s: initial_function([s])
	524
	525	def _parse_sig_swf(self, file_contents):
	526	swfi = SWFInterpreter(file_contents)
	527	TARGET_CLASSNAME = 'SignatureDecipher'
	528	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	529	initial_function = swfi.extract_function(searched_class, 'decipher')
	530	return lambda s: initial_function([s])
	531
	532	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	533	"""Turn the encrypted s field into a working signature"""
	534
	535	if player_url is None:
	536	raise ExtractorError('Cannot decrypt signature without player_url')
	537
	538	if player_url.startswith('//'):
	539	player_url = 'https:' + player_url
	540	try:
	541	player_id = (player_url, self._signature_cache_id(s))
	542	if player_id not in self._player_cache:
	543	func = self._extract_signature_function(
	544	video_id, player_url, s
	545	)
	546	self._player_cache[player_id] = func
	547	func = self._player_cache[player_id]
	548	if self._downloader.params.get('youtube_print_sig_code'):
	549	self._print_sig_code(func, s)
	550	return func(s)
	551	except Exception as e:
	552	tb = traceback.format_exc()
	553	raise ExtractorError(
	554	'Signature extraction failed: ' + tb, cause=e)
	555
	556	def _get_available_subtitles(self, video_id, webpage):
	557	try:
	558	sub_list = self._download_webpage(
	559	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	560	video_id, note=False)
	561	except ExtractorError as err:
	562	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	563	return {}
	564	lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
	565
	566	sub_lang_list = {}
	567	for l in lang_list:
	568	lang = l[1]
	569	if lang in sub_lang_list:
	570	continue
	571	params = compat_urllib_parse.urlencode({
	572	'lang': lang,
	573	'v': video_id,
	574	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	575	'name': unescapeHTML(l[0]).encode('utf-8'),
	576	})
	577	url = 'https://www.youtube.com/api/timedtext?' + params
	578	sub_lang_list[lang] = url
	579	if not sub_lang_list:
	580	self._downloader.report_warning('video doesn\'t have subtitles')
	581	return {}
	582	return sub_lang_list
	583
	584	def _get_available_automatic_caption(self, video_id, webpage):
	585	"""We need the webpage for getting the captions url, pass it as an
	586	argument to speed up the process."""
	587	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	588	self.to_screen('%s: Looking for automatic captions' % video_id)
	589	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	590	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	591	if mobj is None:
	592	self._downloader.report_warning(err_msg)
	593	return {}
	594	player_config = json.loads(mobj.group(1))
	595	try:
	596	args = player_config[u'args']
	597	caption_url = args[u'ttsurl']
	598	timestamp = args[u'timestamp']
	599	# We get the available subtitles
	600	list_params = compat_urllib_parse.urlencode({
	601	'type': 'list',
	602	'tlangs': 1,
	603	'asrs': 1,
	604	})
	605	list_url = caption_url + '&' + list_params
	606	caption_list = self._download_xml(list_url, video_id)
	607	original_lang_node = caption_list.find('track')
	608	if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
	609	self._downloader.report_warning('Video doesn\'t have automatic captions')
	610	return {}
	611	original_lang = original_lang_node.attrib['lang_code']
	612
	613	sub_lang_list = {}
	614	for lang_node in caption_list.findall('target'):
	615	sub_lang = lang_node.attrib['lang_code']
	616	params = compat_urllib_parse.urlencode({
	617	'lang': original_lang,
	618	'tlang': sub_lang,
	619	'fmt': sub_format,
	620	'ts': timestamp,
	621	'kind': 'asr',
	622	})
	623	sub_lang_list[sub_lang] = caption_url + '&' + params
	624	return sub_lang_list
	625	# An extractor error can be raise by the download process if there are
	626	# no automatic captions but there are subtitles
	627	except (KeyError, ExtractorError):
	628	self._downloader.report_warning(err_msg)
	629	return {}
	630
	631	@classmethod
	632	def extract_id(cls, url):
	633	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	634	if mobj is None:
	635	raise ExtractorError('Invalid URL: %s' % url)
	636	video_id = mobj.group(2)
	637	return video_id
	638
	639	def _extract_from_m3u8(self, manifest_url, video_id):
	640	url_map = {}
	641	def _get_urls(_manifest):
	642	lines = _manifest.split('\n')
	643	urls = filter(lambda l: l and not l.startswith('#'),
	644	lines)
	645	return urls
	646	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	647	formats_urls = _get_urls(manifest)
	648	for format_url in formats_urls:
	649	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	650	url_map[itag] = format_url
	651	return url_map
	652
	653	def _extract_annotations(self, video_id):
	654	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	655	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	656
	657	def _real_extract(self, url):
	658	proto = (
	659	'http' if self._downloader.params.get('prefer_insecure', False)
	660	else 'https')
	661
	662	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	663	mobj = re.search(self._NEXT_URL_RE, url)
	664	if mobj:
	665	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	666	video_id = self.extract_id(url)
	667
	668	# Get video webpage
	669	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
	670	pref_cookies = [
	671	c for c in self._downloader.cookiejar
	672	if c.domain == '.youtube.com' and c.name == 'PREF']
	673	for pc in pref_cookies:
	674	if 'hl=' in pc.value:
	675	pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
	676	else:
	677	if pc.value:
	678	pc.value += '&'
	679	pc.value += 'hl=en'
	680	video_webpage = self._download_webpage(url, video_id)
	681
	682	# Attempt to extract SWF player URL
	683	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	684	if mobj is not None:
	685	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	686	else:
	687	player_url = None
	688
	689	# Get video info
	690	self.report_video_info_webpage_download(video_id)
	691	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	692	age_gate = True
	693	# We simulate the access to the video from www.youtube.com/v/{video_id}
	694	# this can be viewed without login into Youtube
	695	data = compat_urllib_parse.urlencode({
	696	'video_id': video_id,
	697	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	698	'sts': self._search_regex(
	699	r'"sts"\s:\s(\d+)', video_webpage, 'sts', default=''),
	700	})
	701	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	702	video_info_webpage = self._download_webpage(
	703	video_info_url, video_id,
	704	note='Refetching age-gated info webpage',
	705	errnote='unable to download video info webpage')
	706	video_info = compat_parse_qs(video_info_webpage)
	707	else:
	708	age_gate = False
	709	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	710	video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	711	% (video_id, el_type))
	712	video_info_webpage = self._download_webpage(video_info_url, video_id,
	713	note=False,
	714	errnote='unable to download video info webpage')
	715	video_info = compat_parse_qs(video_info_webpage)
	716	if 'token' in video_info:
	717	break
	718	if 'token' not in video_info:
	719	if 'reason' in video_info:
	720	raise ExtractorError(
	721	'YouTube said: %s' % video_info['reason'][0],
	722	expected=True, video_id=video_id)
	723	else:
	724	raise ExtractorError(
	725	'"token" parameter not in video info for unknown reason',
	726	video_id=video_id)
	727
	728	if 'view_count' in video_info:
	729	view_count = int(video_info['view_count'][0])
	730	else:
	731	view_count = None
	732
	733	# Check for "rental" videos
	734	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	735	raise ExtractorError('"rental" videos not supported')
	736
	737	# Start extracting information
	738	self.report_information_extraction(video_id)
	739
	740	# uploader
	741	if 'author' not in video_info:
	742	raise ExtractorError('Unable to extract uploader name')
	743	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	744
	745	# uploader_id
	746	video_uploader_id = None
	747	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	748	if mobj is not None:
	749	video_uploader_id = mobj.group(1)
	750	else:
	751	self._downloader.report_warning('unable to extract uploader nickname')
	752
	753	# title
	754	if 'title' in video_info:
	755	video_title = video_info['title'][0]
	756	else:
	757	self._downloader.report_warning('Unable to extract video title')
	758	video_title = '_'
	759
	760	# thumbnail image
	761	# We try first to get a high quality image:
	762	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	763	video_webpage, re.DOTALL)
	764	if m_thumb is not None:
	765	video_thumbnail = m_thumb.group(1)
	766	elif 'thumbnail_url' not in video_info:
	767	self._downloader.report_warning('unable to extract video thumbnail')
	768	video_thumbnail = None
	769	else: # don't panic if we can't find it
	770	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	771
	772	# upload date
	773	upload_date = None
	774	mobj = re.search(r'(?s)id="eow-date.?>(.?)</span>', video_webpage)
	775	if mobj is None:
	776	mobj = re.search(
	777	r'(?s)id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live) on (.*?)</strong>',
	778	video_webpage)
	779	if mobj is not None:
	780	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	781	upload_date = unified_strdate(upload_date)
	782
	783	m_cat_container = self._search_regex(
	784	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	785	video_webpage, 'categories', fatal=False)
	786	if m_cat_container:
	787	category = self._html_search_regex(
	788	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	789	default=None)
	790	video_categories = None if category is None else [category]
	791	else:
	792	video_categories = None
	793
	794	# description
	795	video_description = get_element_by_id("eow-description", video_webpage)
	796	if video_description:
	797	video_description = re.sub(r'''(?x)
	798	<a\s+
	799	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	800	title="([^"]+)"\s+
	801	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	802	class="yt-uix-redirect-link"\s*>
	803	[^<]+
	804	</a>
	805	''', r'\1', video_description)
	806	video_description = clean_html(video_description)
	807	else:
	808	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	809	if fd_mobj:
	810	video_description = unescapeHTML(fd_mobj.group(1))
	811	else:
	812	video_description = ''
	813
	814	def _extract_count(count_name):
	815	count = self._search_regex(
	816	r'id="watch-%s"[^>]>.?([\d,]+)\s*</span>' % re.escape(count_name),
	817	video_webpage, count_name, default=None)
	818	if count is not None:
	819	return int(count.replace(',', ''))
	820	return None
	821	like_count = _extract_count('like')
	822	dislike_count = _extract_count('dislike')
	823
	824	# subtitles
	825	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	826
	827	if self._downloader.params.get('listsubtitles', False):
	828	self._list_available_subtitles(video_id, video_webpage)
	829	return
	830
	831	if 'length_seconds' not in video_info:
	832	self._downloader.report_warning('unable to extract video duration')
	833	video_duration = None
	834	else:
	835	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	836
	837	# annotations
	838	video_annotations = None
	839	if self._downloader.params.get('writeannotations', False):
	840	video_annotations = self._extract_annotations(video_id)
	841
	842	# Decide which formats to download
	843	try:
	844	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	845	if not mobj:
	846	raise ValueError('Could not find vevo ID')
	847	json_code = uppercase_escape(mobj.group(1))
	848	ytplayer_config = json.loads(json_code)
	849	args = ytplayer_config['args']
	850	# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
	851	# this signatures are encrypted
	852	if 'url_encoded_fmt_stream_map' not in args:
	853	raise ValueError('No stream_map present') # caught below
	854	re_signature = re.compile(r'[&,]s=')
	855	m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
	856	if m_s is not None:
	857	self.to_screen('%s: Encrypted signatures detected.' % video_id)
	858	video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
	859	m_s = re_signature.search(args.get('adaptive_fmts', ''))
	860	if m_s is not None:
	861	if 'adaptive_fmts' in video_info:
	862	video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
	863	else:
	864	video_info['adaptive_fmts'] = [args['adaptive_fmts']]
	865	except ValueError:
	866	pass
	867
	868	def _map_to_format_list(urlmap):
	869	formats = []
	870	for itag, video_real_url in urlmap.items():
	871	dct = {
	872	'format_id': itag,
	873	'url': video_real_url,
	874	'player_url': player_url,
	875	}
	876	if itag in self._formats:
	877	dct.update(self._formats[itag])
	878	formats.append(dct)
	879	return formats
	880
	881	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	882	self.report_rtmp_download()
	883	formats = [{
	884	'format_id': '_rtmp',
	885	'protocol': 'rtmp',
	886	'url': video_info['conn'][0],
	887	'player_url': player_url,
	888	}]
	889	elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
	890	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
	891	if 'rtmpe%3Dyes' in encoded_url_map:
	892	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	893	url_map = {}
	894	for url_data_str in encoded_url_map.split(','):
	895	url_data = compat_parse_qs(url_data_str)
	896	if 'itag' not in url_data or 'url' not in url_data:
	897	continue
	898	format_id = url_data['itag'][0]
	899	url = url_data['url'][0]
	900
	901	if 'sig' in url_data:
	902	url += '&signature=' + url_data['sig'][0]
	903	elif 's' in url_data:
	904	encrypted_sig = url_data['s'][0]
	905
	906	if not age_gate:
	907	jsplayer_url_json = self._search_regex(
	908	r'"assets":.+?"js":\s*("[^"]+")',
	909	video_webpage, 'JS player URL')
	910	player_url = json.loads(jsplayer_url_json)
	911	if player_url is None:
	912	player_url_json = self._search_regex(
	913	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	914	video_webpage, 'age gate player URL')
	915	player_url = json.loads(player_url_json)
	916
	917	if self._downloader.params.get('verbose'):
	918	if player_url is None:
	919	player_version = 'unknown'
	920	player_desc = 'unknown'
	921	else:
	922	if player_url.endswith('swf'):
	923	player_version = self._search_regex(
	924	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	925	'flash player', fatal=False)
	926	player_desc = 'flash player %s' % player_version
	927	else:
	928	player_version = self._search_regex(
	929	r'html5player-([^/]+?)(?:/html5player)?\.js',
	930	player_url,
	931	'html5 player', fatal=False)
	932	player_desc = 'html5 player %s' % player_version
	933
	934	parts_sizes = self._signature_cache_id(encrypted_sig)
	935	self.to_screen('{%s} signature length %s, %s' %
	936	(format_id, parts_sizes, player_desc))
	937
	938	signature = self._decrypt_signature(
	939	encrypted_sig, video_id, player_url, age_gate)
	940	url += '&signature=' + signature
	941	if 'ratebypass' not in url:
	942	url += '&ratebypass=yes'
	943	url_map[format_id] = url
	944	formats = _map_to_format_list(url_map)
	945	elif video_info.get('hlsvp'):
	946	manifest_url = video_info['hlsvp'][0]
	947	url_map = self._extract_from_m3u8(manifest_url, video_id)
	948	formats = _map_to_format_list(url_map)
	949	else:
	950	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	951
	952	# Look for the DASH manifest
	953	if self._downloader.params.get('youtube_include_dash_manifest', True):
	954	try:
	955	# The DASH manifest used needs to be the one from the original video_webpage.
	956	# The one found in get_video_info seems to be using different signatures.
	957	# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
	958	# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
	959	# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
	960	if age_gate:
	961	dash_manifest_url = video_info.get('dashmpd')[0]
	962	else:
	963	dash_manifest_url = ytplayer_config['args']['dashmpd']
	964	def decrypt_sig(mobj):
	965	s = mobj.group(1)
	966	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	967	return '/signature/%s' % dec_s
	968	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	969	dash_doc = self._download_xml(
	970	dash_manifest_url, video_id,
	971	note='Downloading DASH manifest',
	972	errnote='Could not download DASH manifest')
	973	for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	974	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	975	if url_el is None:
	976	continue
	977	format_id = r.attrib['id']
	978	video_url = url_el.text
	979	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	980	f = {
	981	'format_id': format_id,
	982	'url': video_url,
	983	'width': int_or_none(r.attrib.get('width')),
	984	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	985	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	986	'filesize': filesize,
	987	}
	988	try:
	989	existing_format = next(
	990	fo for fo in formats
	991	if fo['format_id'] == format_id)
	992	except StopIteration:
	993	f.update(self._formats.get(format_id, {}))
	994	formats.append(f)
	995	else:
	996	existing_format.update(f)
	997
	998	except (ExtractorError, KeyError) as e:
	999	self.report_warning('Skipping DASH manifest: %r' % e, video_id)
	1000
	1001	self._sort_formats(formats)
	1002
	1003	return {
	1004	'id': video_id,
	1005	'uploader': video_uploader,
	1006	'uploader_id': video_uploader_id,
	1007	'upload_date': upload_date,
	1008	'title': video_title,
	1009	'thumbnail': video_thumbnail,
	1010	'description': video_description,
	1011	'categories': video_categories,
	1012	'subtitles': video_subtitles,
	1013	'duration': video_duration,
	1014	'age_limit': 18 if age_gate else 0,
	1015	'annotations': video_annotations,
	1016	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1017	'view_count': view_count,
	1018	'like_count': like_count,
	1019	'dislike_count': dislike_count,
	1020	'formats': formats,
	1021	}
	1022
	1023	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1024	IE_DESC = 'YouTube.com playlists'
	1025	_VALID_URL = r"""(?x)(?:
	1026	(?:https?://)?
	1027	(?:\w+\.)?
	1028	youtube\.com/
	1029	(?:
	1030	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1031	\? (?:.?&)? (?:p\|a\|list)=
	1032	\| p/
	1033	)
	1034	(
	1035	(?:PL\|LL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1036	# Top tracks, they can also include dots
	1037	\|(?:MC)[\w\.]*
	1038	)
	1039	.*
	1040	\|
	1041	((?:PL\|LL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1042	)"""
	1043	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1044	_MORE_PAGES_INDICATOR = r'data-link-type="next"'
	1045	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1046	IE_NAME = 'youtube:playlist'
	1047	_TESTS = [{
	1048	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1049	'info_dict': {
	1050	'title': 'ytdl test PL',
	1051	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1052	},
	1053	'playlist_count': 3,
	1054	}, {
	1055	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1056	'info_dict': {
	1057	'title': 'YDL_Empty_List',
	1058	},
	1059	'playlist_count': 0,
	1060	}, {
	1061	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1062	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1063	'info_dict': {
	1064	'title': '29C3: Not my department',
	1065	},
	1066	'playlist_count': 95,
	1067	}, {
	1068	'note': 'issue #673',
	1069	'url': 'PLBB231211A4F62143',
	1070	'info_dict': {
	1071	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1072	},
	1073	'playlist_mincount': 26,
	1074	}, {
	1075	'note': 'Large playlist',
	1076	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1077	'info_dict': {
	1078	'title': 'Uploads from Cauchemar',
	1079	},
	1080	'playlist_mincount': 799,
	1081	}, {
	1082	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1083	'info_dict': {
	1084	'title': 'YDL_safe_search',
	1085	},
	1086	'playlist_count': 2,
	1087	}, {
	1088	'note': 'embedded',
	1089	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1090	'playlist_count': 4,
	1091	'info_dict': {
	1092	'title': 'JODA15',
	1093	}
	1094	}, {
	1095	'note': 'Embedded SWF player',
	1096	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1097	'playlist_count': 4,
	1098	'info_dict': {
	1099	'title': 'JODA7',
	1100	}
	1101	}]
	1102
	1103	def _real_initialize(self):
	1104	self._login()
	1105
	1106	def _ids_to_results(self, ids):
	1107	return [
	1108	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1109	for vid_id in ids]
	1110
	1111	def _extract_mix(self, playlist_id):
	1112	# The mixes are generated from a a single video
	1113	# the id of the playlist is just 'RD' + video_id
	1114	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1115	webpage = self._download_webpage(
	1116	url, playlist_id, 'Downloading Youtube mix')
	1117	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1118	title_span = (
	1119	search_title('playlist-title') or
	1120	search_title('title long-title') or
	1121	search_title('title'))
	1122	title = clean_html(title_span)
	1123	ids = orderedSet(re.findall(
	1124	r'''(?xs)data-video-username=".?".?
	1125	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1126	webpage))
	1127	url_results = self._ids_to_results(ids)
	1128
	1129	return self.playlist_result(url_results, playlist_id, title)
	1130
	1131	def _real_extract(self, url):
	1132	# Extract playlist id
	1133	mobj = re.match(self._VALID_URL, url)
	1134	if mobj is None:
	1135	raise ExtractorError('Invalid URL: %s' % url)
	1136	playlist_id = mobj.group(1) or mobj.group(2)
	1137
	1138	# Check if it's a video-specific URL
	1139	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1140	if 'v' in query_dict:
	1141	video_id = query_dict['v'][0]
	1142	if self._downloader.params.get('noplaylist'):
	1143	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1144	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1145	else:
	1146	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1147
	1148	if playlist_id.startswith('RD'):
	1149	# Mixes require a custom extraction process
	1150	return self._extract_mix(playlist_id)
	1151	if playlist_id.startswith('TL'):
	1152	raise ExtractorError('For downloading YouTube.com top lists, use '
	1153	'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
	1154
	1155	url = self._TEMPLATE_URL % playlist_id
	1156	page = self._download_webpage(url, playlist_id)
	1157	more_widget_html = content_html = page
	1158
	1159	# Check if the playlist exists or is private
	1160	if re.search(r'<div class="yt-alert-message">[^<]?(The\|This) playlist (does not exist\|is private)[^<]?</div>', page) is not None:
	1161	raise ExtractorError(
	1162	'The playlist doesn\'t exist or is private, use --username or '
	1163	'--netrc to access it.',
	1164	expected=True)
	1165
	1166	# Extract the video ids from the playlist pages
	1167	ids = []
	1168
	1169	for page_num in itertools.count(1):
	1170	matches = re.finditer(self._VIDEO_RE, content_html)
	1171	# We remove the duplicates and the link with index 0
	1172	# (it's not the first video of the playlist)
	1173	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1174	ids.extend(new_ids)
	1175
	1176	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1177	if not mobj:
	1178	break
	1179
	1180	more = self._download_json(
	1181	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1182	'Downloading page #%s' % page_num,
	1183	transform_source=uppercase_escape)
	1184	content_html = more['content_html']
	1185	more_widget_html = more['load_more_widget_html']
	1186
	1187	playlist_title = self._html_search_regex(
	1188	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1189	page, 'title')
	1190
	1191	url_results = self._ids_to_results(ids)
	1192	return self.playlist_result(url_results, playlist_id, playlist_title)
	1193
	1194
	1195	class YoutubeTopListIE(YoutubePlaylistIE):
	1196	IE_NAME = 'youtube:toplist'
	1197	IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
	1198	' (Example: "yttoplist:music:Top Tracks")')
	1199	_VALID_URL = r'yttoplist:(?P<chann>.?):(?P<title>.?)$'
	1200	_TESTS = [{
	1201	'url': 'yttoplist:music:Trending',
	1202	'playlist_mincount': 5,
	1203	'skip': 'Only works for logged-in users',
	1204	}]
	1205
	1206	def _real_extract(self, url):
	1207	mobj = re.match(self._VALID_URL, url)
	1208	channel = mobj.group('chann')
	1209	title = mobj.group('title')
	1210	query = compat_urllib_parse.urlencode({'title': title})
	1211	channel_page = self._download_webpage(
	1212	'https://www.youtube.com/%s' % channel, title)
	1213	link = self._html_search_regex(
	1214	r'''(?x)
	1215	<a\s+href="([^"]+)".?>\s
	1216	<span\s+class="branded-page-module-title-text">\s*
	1217	<span[^>]>.?%s.*?</span>''' % re.escape(query),
	1218	channel_page, 'list')
	1219	url = compat_urlparse.urljoin('https://www.youtube.com/', link)
	1220
	1221	video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
	1222	ids = []
	1223	# sometimes the webpage doesn't contain the videos
	1224	# retry until we get them
	1225	for i in itertools.count(0):
	1226	msg = 'Downloading Youtube mix'
	1227	if i > 0:
	1228	msg += ', retry #%d' % i
	1229
	1230	webpage = self._download_webpage(url, title, msg)
	1231	ids = orderedSet(re.findall(video_re, webpage))
	1232	if ids:
	1233	break
	1234	url_results = self._ids_to_results(ids)
	1235	return self.playlist_result(url_results, playlist_title=title)
	1236
	1237
	1238	class YoutubeChannelIE(InfoExtractor):
	1239	IE_DESC = 'YouTube.com channels'
	1240	_VALID_URL = r"^(?:https?://)?(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
	1241	_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
	1242	_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
	1243	IE_NAME = 'youtube:channel'
	1244	_TESTS = [{
	1245	'note': 'paginated channel',
	1246	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1247	'playlist_mincount': 91,
	1248	}]
	1249
	1250	def extract_videos_from_page(self, page):
	1251	ids_in_page = []
	1252	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1253	if mobj.group(1) not in ids_in_page:
	1254	ids_in_page.append(mobj.group(1))
	1255	return ids_in_page
	1256
	1257	def _real_extract(self, url):
	1258	# Extract channel id
	1259	mobj = re.match(self._VALID_URL, url)
	1260	if mobj is None:
	1261	raise ExtractorError('Invalid URL: %s' % url)
	1262
	1263	# Download channel page
	1264	channel_id = mobj.group(1)
	1265	video_ids = []
	1266	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1267	channel_page = self._download_webpage(url, channel_id)
	1268	autogenerated = re.search(r'''(?x)
	1269	class="[^"]*?(?:
	1270	channel-header-autogenerated-label\|
	1271	yt-channel-title-autogenerated
	1272	)[^"]*"''', channel_page) is not None
	1273
	1274	if autogenerated:
	1275	# The videos are contained in a single page
	1276	# the ajax pages can't be used, they are empty
	1277	video_ids = self.extract_videos_from_page(channel_page)
	1278	else:
	1279	# Download all channel pages using the json-based channel_ajax query
	1280	for pagenum in itertools.count(1):
	1281	url = self._MORE_PAGES_URL % (pagenum, channel_id)
	1282	page = self._download_json(
	1283	url, channel_id, note='Downloading page #%s' % pagenum,
	1284	transform_source=uppercase_escape)
	1285
	1286	ids_in_page = self.extract_videos_from_page(page['content_html'])
	1287	video_ids.extend(ids_in_page)
	1288
	1289	if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
	1290	break
	1291
	1292	self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
	1293
	1294	url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1295	for video_id in video_ids]
	1296	return self.playlist_result(url_entries, channel_id)
	1297
	1298
	1299	class YoutubeUserIE(InfoExtractor):
	1300	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1301	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
	1302	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1303	_GDATA_PAGE_SIZE = 50
	1304	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1305	IE_NAME = 'youtube:user'
	1306
	1307	_TESTS = [{
	1308	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1309	'playlist_mincount': 320,
	1310	'info_dict': {
	1311	'title': 'TheLinuxFoundation',
	1312	}
	1313	}, {
	1314	'url': 'ytuser:phihag',
	1315	'only_matching': True,
	1316	}]
	1317
	1318	@classmethod
	1319	def suitable(cls, url):
	1320	# Don't return True if the url can be extracted with other youtube
	1321	# extractor, the regex would is too permissive and it would match.
	1322	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1323	if any(ie.suitable(url) for ie in other_ies): return False
	1324	else: return super(YoutubeUserIE, cls).suitable(url)
	1325
	1326	def _real_extract(self, url):
	1327	# Extract username
	1328	mobj = re.match(self._VALID_URL, url)
	1329	if mobj is None:
	1330	raise ExtractorError('Invalid URL: %s' % url)
	1331
	1332	username = mobj.group(1)
	1333
	1334	# Download video ids using YouTube Data API. Result size per
	1335	# query is limited (currently to 50 videos) so we need to query
	1336	# page by page until there are no video ids - it means we got
	1337	# all of them.
	1338
	1339	def download_page(pagenum):
	1340	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1341
	1342	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1343	page = self._download_webpage(
	1344	gdata_url, username,
	1345	'Downloading video ids from %d to %d' % (
	1346	start_index, start_index + self._GDATA_PAGE_SIZE))
	1347
	1348	try:
	1349	response = json.loads(page)
	1350	except ValueError as err:
	1351	raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
	1352	if 'entry' not in response['feed']:
	1353	return
	1354
	1355	# Extract video identifiers
	1356	entries = response['feed']['entry']
	1357	for entry in entries:
	1358	title = entry['title']['$t']
	1359	video_id = entry['id']['$t'].split('/')[-1]
	1360	yield {
	1361	'_type': 'url',
	1362	'url': video_id,
	1363	'ie_key': 'Youtube',
	1364	'id': video_id,
	1365	'title': title,
	1366	}
	1367	url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
	1368
	1369	return self.playlist_result(url_results, playlist_title=username)
	1370
	1371
	1372	class YoutubeSearchIE(SearchInfoExtractor):
	1373	IE_DESC = 'YouTube.com searches'
	1374	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1375	_MAX_RESULTS = 1000
	1376	IE_NAME = 'youtube:search'
	1377	_SEARCH_KEY = 'ytsearch'
	1378
	1379	def _get_n_results(self, query, n):
	1380	"""Get a specified number of results for a query"""
	1381
	1382	video_ids = []
	1383	pagenum = 0
	1384	limit = n
	1385	PAGE_SIZE = 50
	1386
	1387	while (PAGE_SIZE * pagenum) < limit:
	1388	result_url = self._API_URL % (
	1389	compat_urllib_parse.quote_plus(query.encode('utf-8')),
	1390	(PAGE_SIZE * pagenum) + 1)
	1391	data_json = self._download_webpage(
	1392	result_url, video_id='query "%s"' % query,
	1393	note='Downloading page %s' % (pagenum + 1),
	1394	errnote='Unable to download API page')
	1395	data = json.loads(data_json)
	1396	api_response = data['data']
	1397
	1398	if 'items' not in api_response:
	1399	raise ExtractorError(
	1400	'[youtube] No video results', expected=True)
	1401
	1402	new_ids = list(video['id'] for video in api_response['items'])
	1403	video_ids += new_ids
	1404
	1405	limit = min(n, api_response['totalItems'])
	1406	pagenum += 1
	1407
	1408	if len(video_ids) > n:
	1409	video_ids = video_ids[:n]
	1410	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1411	for video_id in video_ids]
	1412	return self.playlist_result(videos, query)
	1413
	1414
	1415	class YoutubeSearchDateIE(YoutubeSearchIE):
	1416	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1417	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1418	_SEARCH_KEY = 'ytsearchdate'
	1419	IE_DESC = 'YouTube.com searches, newest videos first'
	1420
	1421
	1422	class YoutubeSearchURLIE(InfoExtractor):
	1423	IE_DESC = 'YouTube.com search URLs'
	1424	IE_NAME = 'youtube:search_url'
	1425	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1426	_TESTS = [{
	1427	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1428	'playlist_mincount': 5,
	1429	'info_dict': {
	1430	'title': 'youtube-dl test video',
	1431	}
	1432	}]
	1433
	1434	def _real_extract(self, url):
	1435	mobj = re.match(self._VALID_URL, url)
	1436	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1437
	1438	webpage = self._download_webpage(url, query)
	1439	result_code = self._search_regex(
	1440	r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1441
	1442	part_codes = re.findall(
	1443	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1444	entries = []
	1445	for part_code in part_codes:
	1446	part_title = self._html_search_regex(
	1447	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1448	part_url_snippet = self._html_search_regex(
	1449	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1450	part_url = compat_urlparse.urljoin(
	1451	'https://www.youtube.com/', part_url_snippet)
	1452	entries.append({
	1453	'_type': 'url',
	1454	'url': part_url,
	1455	'title': part_title,
	1456	})
	1457
	1458	return {
	1459	'_type': 'playlist',
	1460	'entries': entries,
	1461	'title': query,
	1462	}
	1463
	1464
	1465	class YoutubeShowIE(InfoExtractor):
	1466	IE_DESC = 'YouTube.com (multi-season) shows'
	1467	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1468	IE_NAME = 'youtube:show'
	1469	_TESTS = [{
	1470	'url': 'http://www.youtube.com/show/airdisasters',
	1471	'playlist_mincount': 3,
	1472	'info_dict': {
	1473	'id': 'airdisasters',
	1474	'title': 'Air Disasters',
	1475	}
	1476	}]
	1477
	1478	def _real_extract(self, url):
	1479	mobj = re.match(self._VALID_URL, url)
	1480	playlist_id = mobj.group('id')
	1481	webpage = self._download_webpage(
	1482	url, playlist_id, 'Downloading show webpage')
	1483	# There's one playlist for each season of the show
	1484	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1485	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1486	entries = [
	1487	self.url_result(
	1488	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1489	for season in m_seasons
	1490	]
	1491	title = self._og_search_title(webpage, fatal=False)
	1492
	1493	return {
	1494	'_type': 'playlist',
	1495	'id': playlist_id,
	1496	'title': title,
	1497	'entries': entries,
	1498	}
	1499
	1500
	1501	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1502	"""
	1503	Base class for extractors that fetch info from
	1504	http://www.youtube.com/feed_ajax
	1505	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1506	"""
	1507	_LOGIN_REQUIRED = True
	1508	# use action_load_personal_feed instead of action_load_system_feed
	1509	_PERSONAL_FEED = False
	1510
	1511	@property
	1512	def _FEED_TEMPLATE(self):
	1513	action = 'action_load_system_feed'
	1514	if self._PERSONAL_FEED:
	1515	action = 'action_load_personal_feed'
	1516	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1517
	1518	@property
	1519	def IE_NAME(self):
	1520	return 'youtube:%s' % self._FEED_NAME
	1521
	1522	def _real_initialize(self):
	1523	self._login()
	1524
	1525	def _real_extract(self, url):
	1526	feed_entries = []
	1527	paging = 0
	1528	for i in itertools.count(1):
	1529	info = self._download_json(self._FEED_TEMPLATE % paging,
	1530	'%s feed' % self._FEED_NAME,
	1531	'Downloading page %s' % i)
	1532	feed_html = info.get('feed_html') or info.get('content_html')
	1533	load_more_widget_html = info.get('load_more_widget_html') or feed_html
	1534	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1535	ids = orderedSet(m.group(1) for m in m_ids)
	1536	feed_entries.extend(
	1537	self.url_result(video_id, 'Youtube', video_id=video_id)
	1538	for video_id in ids)
	1539	mobj = re.search(
	1540	r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
	1541	load_more_widget_html)
	1542	if mobj is None:
	1543	break
	1544	paging = mobj.group('paging')
	1545	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1546
	1547	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1548	IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
	1549	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1550	_FEED_NAME = 'recommended'
	1551	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1552
	1553	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1554	IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
	1555	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1556	_FEED_NAME = 'watch_later'
	1557	_PLAYLIST_TITLE = 'Youtube Watch Later'
	1558	_PERSONAL_FEED = True
	1559
	1560	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1561	IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
	1562	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1563	_FEED_NAME = 'history'
	1564	_PERSONAL_FEED = True
	1565	_PLAYLIST_TITLE = 'Youtube Watch History'
	1566
	1567	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1568	IE_NAME = 'youtube:favorites'
	1569	IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
	1570	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1571	_LOGIN_REQUIRED = True
	1572
	1573	def _real_extract(self, url):
	1574	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1575	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1576	return self.url_result(playlist_id, 'YoutubePlaylist')
	1577
	1578
	1579	class YoutubeSubscriptionsIE(YoutubePlaylistIE):
	1580	IE_NAME = 'youtube:subscriptions'
	1581	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1582	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1583	_TESTS = []
	1584
	1585	def _real_extract(self, url):
	1586	title = 'Youtube Subscriptions'
	1587	page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
	1588
	1589	# The extraction process is the same as for playlists, but the regex
	1590	# for the video ids doesn't contain an index
	1591	ids = []
	1592	more_widget_html = content_html = page
	1593
	1594	for page_num in itertools.count(1):
	1595	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1596	new_ids = orderedSet(matches)
	1597	ids.extend(new_ids)
	1598
	1599	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1600	if not mobj:
	1601	break
	1602
	1603	more = self._download_json(
	1604	'https://youtube.com/%s' % mobj.group('more'), title,
	1605	'Downloading page #%s' % page_num,
	1606	transform_source=uppercase_escape)
	1607	content_html = more['content_html']
	1608	more_widget_html = more['load_more_widget_html']
	1609
	1610	return {
	1611	'_type': 'playlist',
	1612	'title': title,
	1613	'entries': self._ids_to_results(ids),
	1614	}
	1615
	1616
	1617	class YoutubeTruncatedURLIE(InfoExtractor):
	1618	IE_NAME = 'youtube:truncated_url'
	1619	IE_DESC = False # Do not list
	1620	_VALID_URL = r'''(?x)
	1621	(?:https?://)?[^/]+/watch\?(?:
	1622	feature=[a-z_]+\|
	1623	annotation_id=annotation_[^&]+
	1624	)?$\|
	1625	(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
	1626	'''
	1627
	1628	_TESTS = [{
	1629	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1630	'only_matching': True,
	1631	}, {
	1632	'url': 'http://www.youtube.com/watch?',
	1633	'only_matching': True,
	1634	}]
	1635
	1636	def _real_extract(self, url):
	1637	raise ExtractorError(
	1638	'Did you forget to quote the URL? Remember that & is a meta '
	1639	'character in most shells, so you want to put the URL in quotes, '
	1640	'like youtube-dl '
	1641	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1642	' or simply youtube-dl BaW_jenozKc .',
	1643	expected=True)