jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from .subtitles import SubtitlesInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..utils import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse,
	21	compat_urllib_request,
	22	compat_urlparse,
	23	compat_str,
	24
	25	clean_html,
	26	get_element_by_id,
	27	get_element_by_attribute,
	28	ExtractorError,
	29	int_or_none,
	30	OnDemandPagedList,
	31	unescapeHTML,
	32	unified_strdate,
	33	orderedSet,
	34	uppercase_escape,
	35	)
	36
	37
	38	class YoutubeBaseInfoExtractor(InfoExtractor):
	39	"""Provide base functions for Youtube extractors"""
	40	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	41	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	42	_NETRC_MACHINE = 'youtube'
	43	# If True it will raise an error if no login info is provided
	44	_LOGIN_REQUIRED = False
	45
	46	def _set_language(self):
	47	self._set_cookie(
	48	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	49	# YouTube sets the expire time to about two months
	50	expire_time=time.time() + 2 * 30 * 24 * 3600)
	51
	52	def _login(self):
	53	"""
	54	Attempt to log in to YouTube.
	55	True is returned if successful or skipped.
	56	False is returned if login failed.
	57
	58	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	59	"""
	60	(username, password) = self._get_login_info()
	61	# No authentication to be performed
	62	if username is None:
	63	if self._LOGIN_REQUIRED:
	64	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	65	return True
	66
	67	login_page = self._download_webpage(
	68	self._LOGIN_URL, None,
	69	note='Downloading login page',
	70	errnote='unable to fetch login page', fatal=False)
	71	if login_page is False:
	72	return
	73
	74	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	75	login_page, 'Login GALX parameter')
	76
	77	# Log in
	78	login_form_strs = {
	79	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	80	'Email': username,
	81	'GALX': galx,
	82	'Passwd': password,
	83
	84	'PersistentCookie': 'yes',
	85	'_utf8': '霱',
	86	'bgresponse': 'js_disabled',
	87	'checkConnection': '',
	88	'checkedDomains': 'youtube',
	89	'dnConn': '',
	90	'pstMsg': '0',
	91	'rmShown': '1',
	92	'secTok': '',
	93	'signIn': 'Sign in',
	94	'timeStmp': '',
	95	'service': 'youtube',
	96	'uilel': '3',
	97	'hl': 'en_US',
	98	}
	99
	100	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	101	# chokes on unicode
	102	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	103	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	104
	105	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	106	login_results = self._download_webpage(
	107	req, None,
	108	note='Logging in', errnote='unable to log in', fatal=False)
	109	if login_results is False:
	110	return False
	111
	112	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	113	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	114
	115	# Two-Factor
	116	# TODO add SMS and phone call support - these require making a request and then prompting the user
	117
	118	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	119	tfa_code = self._get_tfa_info()
	120
	121	if tfa_code is None:
	122	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	123	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	124	return False
	125
	126	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	127
	128	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	129	if match is None:
	130	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	131	secTok = match.group(1)
	132	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	133	if match is None:
	134	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	135	timeStmp = match.group(1)
	136
	137	tfa_form_strs = {
	138	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	139	'smsToken': '',
	140	'smsUserPin': tfa_code,
	141	'smsVerifyPin': 'Verify',
	142
	143	'PersistentCookie': 'yes',
	144	'checkConnection': '',
	145	'checkedDomains': 'youtube',
	146	'pstMsg': '1',
	147	'secTok': secTok,
	148	'timeStmp': timeStmp,
	149	'service': 'youtube',
	150	'hl': 'en_US',
	151	}
	152	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	153	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	154
	155	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	156	tfa_results = self._download_webpage(
	157	tfa_req, None,
	158	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	159
	160	if tfa_results is False:
	161	return False
	162
	163	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	164	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	165	return False
	166	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	167	self._downloader.report_warning('unable to log in - did the page structure change?')
	168	return False
	169	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	170	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	171	return False
	172
	173	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	174	self._downloader.report_warning('unable to log in: bad username or password')
	175	return False
	176	return True
	177
	178	def _real_initialize(self):
	179	if self._downloader is None:
	180	return
	181	self._set_language()
	182	if not self._login():
	183	return
	184
	185
	186	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	187	IE_DESC = 'YouTube.com'
	188	_VALID_URL = r"""(?x)^
	189	(
	190	(?:https?://\|//) # http(s):// or protocol-independent URL
	191	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	192	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	193	(?:www\.)?pwnyoutube\.com/\|
	194	(?:www\.)?yourepeat\.com/\|
	195	tube\.majestyc\.net/\|
	196	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	197	(?:.*?\#/)? # handle anchor (#/) redirect urls
	198	(?: # the various things that can precede the ID:
	199	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	200	\|(?: # or the v= param in all its forms
	201	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	202	(?:\?\|\#!?) # the params delimiter ? or # or #!
	203	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	204	v=
	205	)
	206	))
	207	\|youtu\.be/ # just youtu.be/xxxx
	208	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	209	)
	210	)? # all until now is optional -> you can pass the naked ID
	211	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	212	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	213	(?(1).+)? # if we found the ID, everything can follow
	214	$"""
	215	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	216	_formats = {
	217	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	218	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	219	'13': {'ext': '3gp'},
	220	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	221	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	222	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	223	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	224	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	225	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	226	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	227	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	228	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	229	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	230	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	231	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	232
	233
	234	# 3d videos
	235	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	236	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	237	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	238	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	239	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	240	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	241	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	242
	243	# Apple HTTP Live Streaming
	244	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	245	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	246	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	247	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	248	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	249	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	250	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	251
	252	# DASH mp4 video
	253	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	254	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	255	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	256	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	257	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	258	'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	259	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	260	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	261	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	262	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	263	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	264
	265	# Dash mp4 audio
	266	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
	267	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
	268	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
	269
	270	# Dash webm
	271	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	272	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	273	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	274	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	275	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	276	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	277	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
	278	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	279	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	280	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	281	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	282	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	283	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	284	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	285	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	286	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	287	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	288	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	289	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
	290
	291	# Dash webm audio
	292	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	293	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	294
	295	# Dash webm audio with opus inside
	296	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	297	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	298	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	299
	300	# RTMP (unnamed)
	301	'_rtmp': {'protocol': 'rtmp'},
	302	}
	303
	304	IE_NAME = 'youtube'
	305	_TESTS = [
	306	{
	307	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
	308	'info_dict': {
	309	'id': 'BaW_jenozKc',
	310	'ext': 'mp4',
	311	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	312	'uploader': 'Philipp Hagemeister',
	313	'uploader_id': 'phihag',
	314	'upload_date': '20121002',
	315	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	316	'categories': ['Science & Technology'],
	317	'like_count': int,
	318	'dislike_count': int,
	319	}
	320	},
	321	{
	322	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	323	'note': 'Test generic use_cipher_signature video (#897)',
	324	'info_dict': {
	325	'id': 'UxxajLWwzqY',
	326	'ext': 'mp4',
	327	'upload_date': '20120506',
	328	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	329	'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
	330	'uploader': 'Icona Pop',
	331	'uploader_id': 'IconaPop',
	332	}
	333	},
	334	{
	335	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	336	'note': 'Test VEVO video with age protection (#956)',
	337	'info_dict': {
	338	'id': '07FYdnEawAQ',
	339	'ext': 'mp4',
	340	'upload_date': '20130703',
	341	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	342	'description': 'md5:64249768eec3bc4276236606ea996373',
	343	'uploader': 'justintimberlakeVEVO',
	344	'uploader_id': 'justintimberlakeVEVO',
	345	}
	346	},
	347	{
	348	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	349	'note': 'Embed-only video (#1746)',
	350	'info_dict': {
	351	'id': 'yZIXLfi8CZQ',
	352	'ext': 'mp4',
	353	'upload_date': '20120608',
	354	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	355	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	356	'uploader': 'SET India',
	357	'uploader_id': 'setindia'
	358	}
	359	},
	360	{
	361	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	362	'note': '256k DASH audio (format 141) via DASH manifest',
	363	'info_dict': {
	364	'id': 'a9LDPn-MO4I',
	365	'ext': 'm4a',
	366	'upload_date': '20121002',
	367	'uploader_id': '8KVIDEO',
	368	'description': '',
	369	'uploader': '8KVIDEO',
	370	'title': 'UHDTV TEST 8K VIDEO.mp4'
	371	},
	372	'params': {
	373	'youtube_include_dash_manifest': True,
	374	'format': '141',
	375	},
	376	},
	377	# DASH manifest with encrypted signature
	378	{
	379	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	380	'info_dict': {
	381	'id': 'IB3lcPjvWLA',
	382	'ext': 'm4a',
	383	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	384	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	385	'uploader': 'AfrojackVEVO',
	386	'uploader_id': 'AfrojackVEVO',
	387	'upload_date': '20131011',
	388	},
	389	'params': {
	390	'youtube_include_dash_manifest': True,
	391	'format': '141',
	392	},
	393	},
	394	# Controversy video
	395	{
	396	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	397	'info_dict': {
	398	'id': 'T4XJQO3qol8',
	399	'ext': 'mp4',
	400	'upload_date': '20100909',
	401	'uploader': 'The Amazing Atheist',
	402	'uploader_id': 'TheAmazingAtheist',
	403	'title': 'Burning Everyone\'s Koran',
	404	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	405	}
	406	},
	407	# Normal age-gate video (No vevo, embed allowed)
	408	{
	409	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	410	'info_dict': {
	411	'id': 'HtVdAasjOgU',
	412	'ext': 'mp4',
	413	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	414	'description': 'md5:eca57043abae25130f58f655ad9a7771',
	415	'uploader': 'The Witcher',
	416	'uploader_id': 'WitcherGame',
	417	'upload_date': '20140605',
	418	},
	419	},
	420	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	421	{
	422	'url': '__2ABJjxzNo',
	423	'info_dict': {
	424	'id': '__2ABJjxzNo',
	425	'ext': 'mp4',
	426	'upload_date': '20100430',
	427	'uploader_id': 'deadmau5',
	428	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	429	'uploader': 'deadmau5',
	430	'title': 'Deadmau5 - Some Chords (HD)',
	431	},
	432	'expected_warnings': [
	433	'DASH manifest missing',
	434	]
	435	}
	436	]
	437
	438	def __init__(self, args, *kwargs):
	439	super(YoutubeIE, self).__init__(args, *kwargs)
	440	self._player_cache = {}
	441
	442	def report_video_info_webpage_download(self, video_id):
	443	"""Report attempt to download video info webpage."""
	444	self.to_screen('%s: Downloading video info webpage' % video_id)
	445
	446	def report_information_extraction(self, video_id):
	447	"""Report attempt to extract video information."""
	448	self.to_screen('%s: Extracting video information' % video_id)
	449
	450	def report_unavailable_format(self, video_id, format):
	451	"""Report extracted video URL."""
	452	self.to_screen('%s: Format %s not available' % (video_id, format))
	453
	454	def report_rtmp_download(self):
	455	"""Indicate the download will use the RTMP protocol."""
	456	self.to_screen('RTMP download detected')
	457
	458	def _signature_cache_id(self, example_sig):
	459	""" Return a string representation of a signature """
	460	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	461
	462	def _extract_signature_function(self, video_id, player_url, example_sig):
	463	id_m = re.match(
	464	r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	465	player_url)
	466	if not id_m:
	467	raise ExtractorError('Cannot identify player %r' % player_url)
	468	player_type = id_m.group('ext')
	469	player_id = id_m.group('id')
	470
	471	# Read from filesystem cache
	472	func_id = '%s_%s_%s' % (
	473	player_type, player_id, self._signature_cache_id(example_sig))
	474	assert os.path.basename(func_id) == func_id
	475
	476	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	477	if cache_spec is not None:
	478	return lambda s: ''.join(s[i] for i in cache_spec)
	479
	480	if player_type == 'js':
	481	code = self._download_webpage(
	482	player_url, video_id,
	483	note='Downloading %s player %s' % (player_type, player_id),
	484	errnote='Download of %s failed' % player_url)
	485	res = self._parse_sig_js(code)
	486	elif player_type == 'swf':
	487	urlh = self._request_webpage(
	488	player_url, video_id,
	489	note='Downloading %s player %s' % (player_type, player_id),
	490	errnote='Download of %s failed' % player_url)
	491	code = urlh.read()
	492	res = self._parse_sig_swf(code)
	493	else:
	494	assert False, 'Invalid player type %r' % player_type
	495
	496	if cache_spec is None:
	497	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	498	cache_res = res(test_string)
	499	cache_spec = [ord(c) for c in cache_res]
	500
	501	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	502	return res
	503
	504	def _print_sig_code(self, func, example_sig):
	505	def gen_sig_code(idxs):
	506	def _genslice(start, end, step):
	507	starts = '' if start == 0 else str(start)
	508	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	509	steps = '' if step == 1 else (':%d' % step)
	510	return 's[%s%s%s]' % (starts, ends, steps)
	511
	512	step = None
	513	start = '(Never used)' # Quelch pyflakes warnings - start will be
	514	# set as soon as step is set
	515	for i, prev in zip(idxs[1:], idxs[:-1]):
	516	if step is not None:
	517	if i - prev == step:
	518	continue
	519	yield _genslice(start, prev, step)
	520	step = None
	521	continue
	522	if i - prev in [-1, 1]:
	523	step = i - prev
	524	start = prev
	525	continue
	526	else:
	527	yield 's[%d]' % prev
	528	if step is None:
	529	yield 's[%d]' % i
	530	else:
	531	yield _genslice(start, i, step)
	532
	533	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	534	cache_res = func(test_string)
	535	cache_spec = [ord(c) for c in cache_res]
	536	expr_code = ' + '.join(gen_sig_code(cache_spec))
	537	signature_id_tuple = '(%s)' % (
	538	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	539	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	540	' return %s\n') % (signature_id_tuple, expr_code)
	541	self.to_screen('Extracted signature function:\n' + code)
	542
	543	def _parse_sig_js(self, jscode):
	544	funcname = self._search_regex(
	545	r'\.sig\\|\\|([a-zA-Z0-9]+)\(', jscode,
	546	'Initial JS player signature function name')
	547
	548	jsi = JSInterpreter(jscode)
	549	initial_function = jsi.extract_function(funcname)
	550	return lambda s: initial_function([s])
	551
	552	def _parse_sig_swf(self, file_contents):
	553	swfi = SWFInterpreter(file_contents)
	554	TARGET_CLASSNAME = 'SignatureDecipher'
	555	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	556	initial_function = swfi.extract_function(searched_class, 'decipher')
	557	return lambda s: initial_function([s])
	558
	559	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	560	"""Turn the encrypted s field into a working signature"""
	561
	562	if player_url is None:
	563	raise ExtractorError('Cannot decrypt signature without player_url')
	564
	565	if player_url.startswith('//'):
	566	player_url = 'https:' + player_url
	567	try:
	568	player_id = (player_url, self._signature_cache_id(s))
	569	if player_id not in self._player_cache:
	570	func = self._extract_signature_function(
	571	video_id, player_url, s
	572	)
	573	self._player_cache[player_id] = func
	574	func = self._player_cache[player_id]
	575	if self._downloader.params.get('youtube_print_sig_code'):
	576	self._print_sig_code(func, s)
	577	return func(s)
	578	except Exception as e:
	579	tb = traceback.format_exc()
	580	raise ExtractorError(
	581	'Signature extraction failed: ' + tb, cause=e)
	582
	583	def _get_available_subtitles(self, video_id, webpage):
	584	try:
	585	sub_list = self._download_webpage(
	586	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	587	video_id, note=False)
	588	except ExtractorError as err:
	589	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	590	return {}
	591	lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
	592
	593	sub_lang_list = {}
	594	for l in lang_list:
	595	lang = l[1]
	596	if lang in sub_lang_list:
	597	continue
	598	params = compat_urllib_parse.urlencode({
	599	'lang': lang,
	600	'v': video_id,
	601	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	602	'name': unescapeHTML(l[0]).encode('utf-8'),
	603	})
	604	url = 'https://www.youtube.com/api/timedtext?' + params
	605	sub_lang_list[lang] = url
	606	if not sub_lang_list:
	607	self._downloader.report_warning('video doesn\'t have subtitles')
	608	return {}
	609	return sub_lang_list
	610
	611	def _get_available_automatic_caption(self, video_id, webpage):
	612	"""We need the webpage for getting the captions url, pass it as an
	613	argument to speed up the process."""
	614	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	615	self.to_screen('%s: Looking for automatic captions' % video_id)
	616	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	617	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	618	if mobj is None:
	619	self._downloader.report_warning(err_msg)
	620	return {}
	621	player_config = json.loads(mobj.group(1))
	622	try:
	623	args = player_config['args']
	624	caption_url = args['ttsurl']
	625	timestamp = args['timestamp']
	626	# We get the available subtitles
	627	list_params = compat_urllib_parse.urlencode({
	628	'type': 'list',
	629	'tlangs': 1,
	630	'asrs': 1,
	631	})
	632	list_url = caption_url + '&' + list_params
	633	caption_list = self._download_xml(list_url, video_id)
	634	original_lang_node = caption_list.find('track')
	635	if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr':
	636	self._downloader.report_warning('Video doesn\'t have automatic captions')
	637	return {}
	638	original_lang = original_lang_node.attrib['lang_code']
	639
	640	sub_lang_list = {}
	641	for lang_node in caption_list.findall('target'):
	642	sub_lang = lang_node.attrib['lang_code']
	643	params = compat_urllib_parse.urlencode({
	644	'lang': original_lang,
	645	'tlang': sub_lang,
	646	'fmt': sub_format,
	647	'ts': timestamp,
	648	'kind': 'asr',
	649	})
	650	sub_lang_list[sub_lang] = caption_url + '&' + params
	651	return sub_lang_list
	652	# An extractor error can be raise by the download process if there are
	653	# no automatic captions but there are subtitles
	654	except (KeyError, ExtractorError):
	655	self._downloader.report_warning(err_msg)
	656	return {}
	657
	658	@classmethod
	659	def extract_id(cls, url):
	660	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	661	if mobj is None:
	662	raise ExtractorError('Invalid URL: %s' % url)
	663	video_id = mobj.group(2)
	664	return video_id
	665
	666	def _extract_from_m3u8(self, manifest_url, video_id):
	667	url_map = {}
	668
	669	def _get_urls(_manifest):
	670	lines = _manifest.split('\n')
	671	urls = filter(lambda l: l and not l.startswith('#'),
	672	lines)
	673	return urls
	674	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	675	formats_urls = _get_urls(manifest)
	676	for format_url in formats_urls:
	677	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	678	url_map[itag] = format_url
	679	return url_map
	680
	681	def _extract_annotations(self, video_id):
	682	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	683	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	684
	685	def _parse_dash_manifest(
	686	self, video_id, dash_manifest_url, player_url, age_gate):
	687	def decrypt_sig(mobj):
	688	s = mobj.group(1)
	689	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	690	return '/signature/%s' % dec_s
	691	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	692	dash_doc = self._download_xml(
	693	dash_manifest_url, video_id,
	694	note='Downloading DASH manifest',
	695	errnote='Could not download DASH manifest')
	696
	697	formats = []
	698	for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	699	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	700	if url_el is None:
	701	continue
	702	format_id = r.attrib['id']
	703	video_url = url_el.text
	704	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	705	f = {
	706	'format_id': format_id,
	707	'url': video_url,
	708	'width': int_or_none(r.attrib.get('width')),
	709	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	710	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	711	'filesize': filesize,
	712	'fps': int_or_none(r.attrib.get('frameRate')),
	713	}
	714	try:
	715	existing_format = next(
	716	fo for fo in formats
	717	if fo['format_id'] == format_id)
	718	except StopIteration:
	719	f.update(self._formats.get(format_id, {}))
	720	formats.append(f)
	721	else:
	722	existing_format.update(f)
	723	return formats
	724
	725	def _real_extract(self, url):
	726	proto = (
	727	'http' if self._downloader.params.get('prefer_insecure', False)
	728	else 'https')
	729
	730	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	731	mobj = re.search(self._NEXT_URL_RE, url)
	732	if mobj:
	733	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	734	video_id = self.extract_id(url)
	735
	736	# Get video webpage
	737	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	738	video_webpage = self._download_webpage(url, video_id)
	739
	740	# Attempt to extract SWF player URL
	741	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	742	if mobj is not None:
	743	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	744	else:
	745	player_url = None
	746
	747	# Get video info
	748	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	749	age_gate = True
	750	# We simulate the access to the video from www.youtube.com/v/{video_id}
	751	# this can be viewed without login into Youtube
	752	data = compat_urllib_parse.urlencode({
	753	'video_id': video_id,
	754	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	755	'sts': self._search_regex(
	756	r'"sts"\s:\s(\d+)', video_webpage, 'sts', default=''),
	757	})
	758	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	759	video_info_webpage = self._download_webpage(
	760	video_info_url, video_id,
	761	note='Refetching age-gated info webpage',
	762	errnote='unable to download video info webpage')
	763	video_info = compat_parse_qs(video_info_webpage)
	764	else:
	765	age_gate = False
	766	try:
	767	# Try looking directly into the video webpage
	768	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	769	if not mobj:
	770	raise ValueError('Could not find ytplayer.config') # caught below
	771	json_code = uppercase_escape(mobj.group(1))
	772	ytplayer_config = json.loads(json_code)
	773	args = ytplayer_config['args']
	774	# Convert to the same format returned by compat_parse_qs
	775	video_info = dict((k, [v]) for k, v in args.items())
	776	if 'url_encoded_fmt_stream_map' not in args:
	777	raise ValueError('No stream_map present') # caught below
	778	except ValueError:
	779	# We fallback to the get_video_info pages (used by the embed page)
	780	self.report_video_info_webpage_download(video_id)
	781	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	782	video_info_url = (
	783	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	784	% (proto, video_id, el_type))
	785	video_info_webpage = self._download_webpage(
	786	video_info_url,
	787	video_id, note=False,
	788	errnote='unable to download video info webpage')
	789	video_info = compat_parse_qs(video_info_webpage)
	790	if 'token' in video_info:
	791	break
	792	if 'token' not in video_info:
	793	if 'reason' in video_info:
	794	raise ExtractorError(
	795	'YouTube said: %s' % video_info['reason'][0],
	796	expected=True, video_id=video_id)
	797	else:
	798	raise ExtractorError(
	799	'"token" parameter not in video info for unknown reason',
	800	video_id=video_id)
	801
	802	if 'view_count' in video_info:
	803	view_count = int(video_info['view_count'][0])
	804	else:
	805	view_count = None
	806
	807	# Check for "rental" videos
	808	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	809	raise ExtractorError('"rental" videos not supported')
	810
	811	# Start extracting information
	812	self.report_information_extraction(video_id)
	813
	814	# uploader
	815	if 'author' not in video_info:
	816	raise ExtractorError('Unable to extract uploader name')
	817	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	818
	819	# uploader_id
	820	video_uploader_id = None
	821	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	822	if mobj is not None:
	823	video_uploader_id = mobj.group(1)
	824	else:
	825	self._downloader.report_warning('unable to extract uploader nickname')
	826
	827	# title
	828	if 'title' in video_info:
	829	video_title = video_info['title'][0]
	830	else:
	831	self._downloader.report_warning('Unable to extract video title')
	832	video_title = '_'
	833
	834	# thumbnail image
	835	# We try first to get a high quality image:
	836	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	837	video_webpage, re.DOTALL)
	838	if m_thumb is not None:
	839	video_thumbnail = m_thumb.group(1)
	840	elif 'thumbnail_url' not in video_info:
	841	self._downloader.report_warning('unable to extract video thumbnail')
	842	video_thumbnail = None
	843	else: # don't panic if we can't find it
	844	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	845
	846	# upload date
	847	upload_date = None
	848	mobj = re.search(r'(?s)id="eow-date.?>(.?)</span>', video_webpage)
	849	if mobj is None:
	850	mobj = re.search(
	851	r'(?s)id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live) on (.*?)</strong>',
	852	video_webpage)
	853	if mobj is not None:
	854	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	855	upload_date = unified_strdate(upload_date)
	856
	857	m_cat_container = self._search_regex(
	858	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	859	video_webpage, 'categories', fatal=False)
	860	if m_cat_container:
	861	category = self._html_search_regex(
	862	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	863	default=None)
	864	video_categories = None if category is None else [category]
	865	else:
	866	video_categories = None
	867
	868	# description
	869	video_description = get_element_by_id("eow-description", video_webpage)
	870	if video_description:
	871	video_description = re.sub(r'''(?x)
	872	<a\s+
	873	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	874	title="([^"]+)"\s+
	875	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	876	class="yt-uix-redirect-link"\s*>
	877	[^<]+
	878	</a>
	879	''', r'\1', video_description)
	880	video_description = clean_html(video_description)
	881	else:
	882	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	883	if fd_mobj:
	884	video_description = unescapeHTML(fd_mobj.group(1))
	885	else:
	886	video_description = ''
	887
	888	def _extract_count(count_name):
	889	count = self._search_regex(
	890	r'id="watch-%s"[^>]>.?([\d,]+)\s*</span>' % re.escape(count_name),
	891	video_webpage, count_name, default=None)
	892	if count is not None:
	893	return int(count.replace(',', ''))
	894	return None
	895	like_count = _extract_count('like')
	896	dislike_count = _extract_count('dislike')
	897
	898	# subtitles
	899	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	900
	901	if self._downloader.params.get('listsubtitles', False):
	902	self._list_available_subtitles(video_id, video_webpage)
	903	return
	904
	905	if 'length_seconds' not in video_info:
	906	self._downloader.report_warning('unable to extract video duration')
	907	video_duration = None
	908	else:
	909	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	910
	911	# annotations
	912	video_annotations = None
	913	if self._downloader.params.get('writeannotations', False):
	914	video_annotations = self._extract_annotations(video_id)
	915
	916	def _map_to_format_list(urlmap):
	917	formats = []
	918	for itag, video_real_url in urlmap.items():
	919	dct = {
	920	'format_id': itag,
	921	'url': video_real_url,
	922	'player_url': player_url,
	923	}
	924	if itag in self._formats:
	925	dct.update(self._formats[itag])
	926	formats.append(dct)
	927	return formats
	928
	929	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	930	self.report_rtmp_download()
	931	formats = [{
	932	'format_id': '_rtmp',
	933	'protocol': 'rtmp',
	934	'url': video_info['conn'][0],
	935	'player_url': player_url,
	936	}]
	937	elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
	938	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	939	if 'rtmpe%3Dyes' in encoded_url_map:
	940	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	941	url_map = {}
	942	for url_data_str in encoded_url_map.split(','):
	943	url_data = compat_parse_qs(url_data_str)
	944	if 'itag' not in url_data or 'url' not in url_data:
	945	continue
	946	format_id = url_data['itag'][0]
	947	url = url_data['url'][0]
	948
	949	if 'sig' in url_data:
	950	url += '&signature=' + url_data['sig'][0]
	951	elif 's' in url_data:
	952	encrypted_sig = url_data['s'][0]
	953
	954	if not age_gate:
	955	jsplayer_url_json = self._search_regex(
	956	r'"assets":.+?"js":\s*("[^"]+")',
	957	video_webpage, 'JS player URL')
	958	player_url = json.loads(jsplayer_url_json)
	959	if player_url is None:
	960	player_url_json = self._search_regex(
	961	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	962	video_webpage, 'age gate player URL')
	963	player_url = json.loads(player_url_json)
	964
	965	if self._downloader.params.get('verbose'):
	966	if player_url is None:
	967	player_version = 'unknown'
	968	player_desc = 'unknown'
	969	else:
	970	if player_url.endswith('swf'):
	971	player_version = self._search_regex(
	972	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	973	'flash player', fatal=False)
	974	player_desc = 'flash player %s' % player_version
	975	else:
	976	player_version = self._search_regex(
	977	r'html5player-([^/]+?)(?:/html5player)?\.js',
	978	player_url,
	979	'html5 player', fatal=False)
	980	player_desc = 'html5 player %s' % player_version
	981
	982	parts_sizes = self._signature_cache_id(encrypted_sig)
	983	self.to_screen('{%s} signature length %s, %s' %
	984	(format_id, parts_sizes, player_desc))
	985
	986	signature = self._decrypt_signature(
	987	encrypted_sig, video_id, player_url, age_gate)
	988	url += '&signature=' + signature
	989	if 'ratebypass' not in url:
	990	url += '&ratebypass=yes'
	991	url_map[format_id] = url
	992	formats = _map_to_format_list(url_map)
	993	elif video_info.get('hlsvp'):
	994	manifest_url = video_info['hlsvp'][0]
	995	url_map = self._extract_from_m3u8(manifest_url, video_id)
	996	formats = _map_to_format_list(url_map)
	997	else:
	998	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	999
	1000	# Look for the DASH manifest
	1001	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1002	dash_mpd = video_info.get('dashmpd')
	1003	if not dash_mpd:
	1004	self.report_warning('%s: DASH manifest missing' % video_id)
	1005	else:
	1006	dash_manifest_url = dash_mpd[0]
	1007	try:
	1008	dash_formats = self._parse_dash_manifest(
	1009	video_id, dash_manifest_url, player_url, age_gate)
	1010	except (ExtractorError, KeyError) as e:
	1011	self.report_warning(
	1012	'Skipping DASH manifest: %r' % e, video_id)
	1013	else:
	1014	formats.extend(dash_formats)
	1015
	1016	self._sort_formats(formats)
	1017
	1018	return {
	1019	'id': video_id,
	1020	'uploader': video_uploader,
	1021	'uploader_id': video_uploader_id,
	1022	'upload_date': upload_date,
	1023	'title': video_title,
	1024	'thumbnail': video_thumbnail,
	1025	'description': video_description,
	1026	'categories': video_categories,
	1027	'subtitles': video_subtitles,
	1028	'duration': video_duration,
	1029	'age_limit': 18 if age_gate else 0,
	1030	'annotations': video_annotations,
	1031	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1032	'view_count': view_count,
	1033	'like_count': like_count,
	1034	'dislike_count': dislike_count,
	1035	'formats': formats,
	1036	}
	1037
	1038
	1039	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1040	IE_DESC = 'YouTube.com playlists'
	1041	_VALID_URL = r"""(?x)(?:
	1042	(?:https?://)?
	1043	(?:\w+\.)?
	1044	youtube\.com/
	1045	(?:
	1046	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1047	\? (?:.?&)? (?:p\|a\|list)=
	1048	\| p/
	1049	)
	1050	(
	1051	(?:PL\|LL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1052	# Top tracks, they can also include dots
	1053	\|(?:MC)[\w\.]*
	1054	)
	1055	.*
	1056	\|
	1057	((?:PL\|LL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1058	)"""
	1059	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1060	_MORE_PAGES_INDICATOR = r'data-link-type="next"'
	1061	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1062	IE_NAME = 'youtube:playlist'
	1063	_TESTS = [{
	1064	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1065	'info_dict': {
	1066	'title': 'ytdl test PL',
	1067	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1068	},
	1069	'playlist_count': 3,
	1070	}, {
	1071	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1072	'info_dict': {
	1073	'title': 'YDL_Empty_List',
	1074	},
	1075	'playlist_count': 0,
	1076	}, {
	1077	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1078	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1079	'info_dict': {
	1080	'title': '29C3: Not my department',
	1081	},
	1082	'playlist_count': 95,
	1083	}, {
	1084	'note': 'issue #673',
	1085	'url': 'PLBB231211A4F62143',
	1086	'info_dict': {
	1087	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1088	},
	1089	'playlist_mincount': 26,
	1090	}, {
	1091	'note': 'Large playlist',
	1092	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1093	'info_dict': {
	1094	'title': 'Uploads from Cauchemar',
	1095	},
	1096	'playlist_mincount': 799,
	1097	}, {
	1098	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1099	'info_dict': {
	1100	'title': 'YDL_safe_search',
	1101	},
	1102	'playlist_count': 2,
	1103	}, {
	1104	'note': 'embedded',
	1105	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1106	'playlist_count': 4,
	1107	'info_dict': {
	1108	'title': 'JODA15',
	1109	}
	1110	}, {
	1111	'note': 'Embedded SWF player',
	1112	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1113	'playlist_count': 4,
	1114	'info_dict': {
	1115	'title': 'JODA7',
	1116	}
	1117	}]
	1118
	1119	def _real_initialize(self):
	1120	self._login()
	1121
	1122	def _ids_to_results(self, ids):
	1123	return [
	1124	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1125	for vid_id in ids]
	1126
	1127	def _extract_mix(self, playlist_id):
	1128	# The mixes are generated from a a single video
	1129	# the id of the playlist is just 'RD' + video_id
	1130	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1131	webpage = self._download_webpage(
	1132	url, playlist_id, 'Downloading Youtube mix')
	1133	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1134	title_span = (
	1135	search_title('playlist-title') or
	1136	search_title('title long-title') or
	1137	search_title('title'))
	1138	title = clean_html(title_span)
	1139	ids = orderedSet(re.findall(
	1140	r'''(?xs)data-video-username=".?".?
	1141	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1142	webpage))
	1143	url_results = self._ids_to_results(ids)
	1144
	1145	return self.playlist_result(url_results, playlist_id, title)
	1146
	1147	def _real_extract(self, url):
	1148	# Extract playlist id
	1149	mobj = re.match(self._VALID_URL, url)
	1150	if mobj is None:
	1151	raise ExtractorError('Invalid URL: %s' % url)
	1152	playlist_id = mobj.group(1) or mobj.group(2)
	1153
	1154	# Check if it's a video-specific URL
	1155	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1156	if 'v' in query_dict:
	1157	video_id = query_dict['v'][0]
	1158	if self._downloader.params.get('noplaylist'):
	1159	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1160	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1161	else:
	1162	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1163
	1164	if playlist_id.startswith('RD'):
	1165	# Mixes require a custom extraction process
	1166	return self._extract_mix(playlist_id)
	1167	if playlist_id.startswith('TL'):
	1168	raise ExtractorError('For downloading YouTube.com top lists, use '
	1169	'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
	1170
	1171	url = self._TEMPLATE_URL % playlist_id
	1172	page = self._download_webpage(url, playlist_id)
	1173	more_widget_html = content_html = page
	1174
	1175	# Check if the playlist exists or is private
	1176	if re.search(r'<div class="yt-alert-message">[^<]?(The\|This) playlist (does not exist\|is private)[^<]?</div>', page) is not None:
	1177	raise ExtractorError(
	1178	'The playlist doesn\'t exist or is private, use --username or '
	1179	'--netrc to access it.',
	1180	expected=True)
	1181
	1182	# Extract the video ids from the playlist pages
	1183	ids = []
	1184
	1185	for page_num in itertools.count(1):
	1186	matches = re.finditer(self._VIDEO_RE, content_html)
	1187	# We remove the duplicates and the link with index 0
	1188	# (it's not the first video of the playlist)
	1189	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1190	ids.extend(new_ids)
	1191
	1192	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1193	if not mobj:
	1194	break
	1195
	1196	more = self._download_json(
	1197	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1198	'Downloading page #%s' % page_num,
	1199	transform_source=uppercase_escape)
	1200	content_html = more['content_html']
	1201	more_widget_html = more['load_more_widget_html']
	1202
	1203	playlist_title = self._html_search_regex(
	1204	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1205	page, 'title')
	1206
	1207	url_results = self._ids_to_results(ids)
	1208	return self.playlist_result(url_results, playlist_id, playlist_title)
	1209
	1210
	1211	class YoutubeTopListIE(YoutubePlaylistIE):
	1212	IE_NAME = 'youtube:toplist'
	1213	IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
	1214	' (Example: "yttoplist:music:Top Tracks")')
	1215	_VALID_URL = r'yttoplist:(?P<chann>.?):(?P<title>.?)$'
	1216	_TESTS = [{
	1217	'url': 'yttoplist:music:Trending',
	1218	'playlist_mincount': 5,
	1219	'skip': 'Only works for logged-in users',
	1220	}]
	1221
	1222	def _real_extract(self, url):
	1223	mobj = re.match(self._VALID_URL, url)
	1224	channel = mobj.group('chann')
	1225	title = mobj.group('title')
	1226	query = compat_urllib_parse.urlencode({'title': title})
	1227	channel_page = self._download_webpage(
	1228	'https://www.youtube.com/%s' % channel, title)
	1229	link = self._html_search_regex(
	1230	r'''(?x)
	1231	<a\s+href="([^"]+)".?>\s
	1232	<span\s+class="branded-page-module-title-text">\s*
	1233	<span[^>]>.?%s.*?</span>''' % re.escape(query),
	1234	channel_page, 'list')
	1235	url = compat_urlparse.urljoin('https://www.youtube.com/', link)
	1236
	1237	video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
	1238	ids = []
	1239	# sometimes the webpage doesn't contain the videos
	1240	# retry until we get them
	1241	for i in itertools.count(0):
	1242	msg = 'Downloading Youtube mix'
	1243	if i > 0:
	1244	msg += ', retry #%d' % i
	1245
	1246	webpage = self._download_webpage(url, title, msg)
	1247	ids = orderedSet(re.findall(video_re, webpage))
	1248	if ids:
	1249	break
	1250	url_results = self._ids_to_results(ids)
	1251	return self.playlist_result(url_results, playlist_title=title)
	1252
	1253
	1254	class YoutubeChannelIE(InfoExtractor):
	1255	IE_DESC = 'YouTube.com channels'
	1256	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1257	_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
	1258	_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
	1259	IE_NAME = 'youtube:channel'
	1260	_TESTS = [{
	1261	'note': 'paginated channel',
	1262	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1263	'playlist_mincount': 91,
	1264	}]
	1265
	1266	def extract_videos_from_page(self, page):
	1267	ids_in_page = []
	1268	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1269	if mobj.group(1) not in ids_in_page:
	1270	ids_in_page.append(mobj.group(1))
	1271	return ids_in_page
	1272
	1273	def _real_extract(self, url):
	1274	channel_id = self._match_id(url)
	1275
	1276	video_ids = []
	1277	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1278	channel_page = self._download_webpage(url, channel_id)
	1279	autogenerated = re.search(r'''(?x)
	1280	class="[^"]*?(?:
	1281	channel-header-autogenerated-label\|
	1282	yt-channel-title-autogenerated
	1283	)[^"]*"''', channel_page) is not None
	1284
	1285	if autogenerated:
	1286	# The videos are contained in a single page
	1287	# the ajax pages can't be used, they are empty
	1288	video_ids = self.extract_videos_from_page(channel_page)
	1289	entries = [
	1290	self.url_result(video_id, 'Youtube', video_id=video_id)
	1291	for video_id in video_ids]
	1292	return self.playlist_result(entries, channel_id)
	1293
	1294	def _entries():
	1295	for pagenum in itertools.count(1):
	1296	url = self._MORE_PAGES_URL % (pagenum, channel_id)
	1297	page = self._download_json(
	1298	url, channel_id, note='Downloading page #%s' % pagenum,
	1299	transform_source=uppercase_escape)
	1300
	1301	ids_in_page = self.extract_videos_from_page(page['content_html'])
	1302	for video_id in ids_in_page:
	1303	yield self.url_result(
	1304	video_id, 'Youtube', video_id=video_id)
	1305
	1306	if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
	1307	break
	1308
	1309	return self.playlist_result(_entries(), channel_id)
	1310
	1311
	1312	class YoutubeUserIE(InfoExtractor):
	1313	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1314	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1315	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1316	_GDATA_PAGE_SIZE = 50
	1317	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1318	IE_NAME = 'youtube:user'
	1319
	1320	_TESTS = [{
	1321	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1322	'playlist_mincount': 320,
	1323	'info_dict': {
	1324	'title': 'TheLinuxFoundation',
	1325	}
	1326	}, {
	1327	'url': 'ytuser:phihag',
	1328	'only_matching': True,
	1329	}]
	1330
	1331	@classmethod
	1332	def suitable(cls, url):
	1333	# Don't return True if the url can be extracted with other youtube
	1334	# extractor, the regex would is too permissive and it would match.
	1335	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1336	if any(ie.suitable(url) for ie in other_ies):
	1337	return False
	1338	else:
	1339	return super(YoutubeUserIE, cls).suitable(url)
	1340
	1341	def _real_extract(self, url):
	1342	username = self._match_id(url)
	1343
	1344	# Download video ids using YouTube Data API. Result size per
	1345	# query is limited (currently to 50 videos) so we need to query
	1346	# page by page until there are no video ids - it means we got
	1347	# all of them.
	1348
	1349	def download_page(pagenum):
	1350	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1351
	1352	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1353	page = self._download_webpage(
	1354	gdata_url, username,
	1355	'Downloading video ids from %d to %d' % (
	1356	start_index, start_index + self._GDATA_PAGE_SIZE))
	1357
	1358	try:
	1359	response = json.loads(page)
	1360	except ValueError as err:
	1361	raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
	1362	if 'entry' not in response['feed']:
	1363	return
	1364
	1365	# Extract video identifiers
	1366	entries = response['feed']['entry']
	1367	for entry in entries:
	1368	title = entry['title']['$t']
	1369	video_id = entry['id']['$t'].split('/')[-1]
	1370	yield {
	1371	'_type': 'url',
	1372	'url': video_id,
	1373	'ie_key': 'Youtube',
	1374	'id': video_id,
	1375	'title': title,
	1376	}
	1377	url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
	1378
	1379	return self.playlist_result(url_results, playlist_title=username)
	1380
	1381
	1382	class YoutubeSearchIE(SearchInfoExtractor):
	1383	IE_DESC = 'YouTube.com searches'
	1384	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1385	_MAX_RESULTS = 1000
	1386	IE_NAME = 'youtube:search'
	1387	_SEARCH_KEY = 'ytsearch'
	1388
	1389	def _get_n_results(self, query, n):
	1390	"""Get a specified number of results for a query"""
	1391
	1392	video_ids = []
	1393	pagenum = 0
	1394	limit = n
	1395	PAGE_SIZE = 50
	1396
	1397	while (PAGE_SIZE * pagenum) < limit:
	1398	result_url = self._API_URL % (
	1399	compat_urllib_parse.quote_plus(query.encode('utf-8')),
	1400	(PAGE_SIZE * pagenum) + 1)
	1401	data_json = self._download_webpage(
	1402	result_url, video_id='query "%s"' % query,
	1403	note='Downloading page %s' % (pagenum + 1),
	1404	errnote='Unable to download API page')
	1405	data = json.loads(data_json)
	1406	api_response = data['data']
	1407
	1408	if 'items' not in api_response:
	1409	raise ExtractorError(
	1410	'[youtube] No video results', expected=True)
	1411
	1412	new_ids = list(video['id'] for video in api_response['items'])
	1413	video_ids += new_ids
	1414
	1415	limit = min(n, api_response['totalItems'])
	1416	pagenum += 1
	1417
	1418	if len(video_ids) > n:
	1419	video_ids = video_ids[:n]
	1420	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1421	for video_id in video_ids]
	1422	return self.playlist_result(videos, query)
	1423
	1424
	1425	class YoutubeSearchDateIE(YoutubeSearchIE):
	1426	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1427	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1428	_SEARCH_KEY = 'ytsearchdate'
	1429	IE_DESC = 'YouTube.com searches, newest videos first'
	1430
	1431
	1432	class YoutubeSearchURLIE(InfoExtractor):
	1433	IE_DESC = 'YouTube.com search URLs'
	1434	IE_NAME = 'youtube:search_url'
	1435	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1436	_TESTS = [{
	1437	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1438	'playlist_mincount': 5,
	1439	'info_dict': {
	1440	'title': 'youtube-dl test video',
	1441	}
	1442	}]
	1443
	1444	def _real_extract(self, url):
	1445	mobj = re.match(self._VALID_URL, url)
	1446	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1447
	1448	webpage = self._download_webpage(url, query)
	1449	result_code = self._search_regex(
	1450	r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1451
	1452	part_codes = re.findall(
	1453	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1454	entries = []
	1455	for part_code in part_codes:
	1456	part_title = self._html_search_regex(
	1457	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1458	part_url_snippet = self._html_search_regex(
	1459	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1460	part_url = compat_urlparse.urljoin(
	1461	'https://www.youtube.com/', part_url_snippet)
	1462	entries.append({
	1463	'_type': 'url',
	1464	'url': part_url,
	1465	'title': part_title,
	1466	})
	1467
	1468	return {
	1469	'_type': 'playlist',
	1470	'entries': entries,
	1471	'title': query,
	1472	}
	1473
	1474
	1475	class YoutubeShowIE(InfoExtractor):
	1476	IE_DESC = 'YouTube.com (multi-season) shows'
	1477	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1478	IE_NAME = 'youtube:show'
	1479	_TESTS = [{
	1480	'url': 'http://www.youtube.com/show/airdisasters',
	1481	'playlist_mincount': 3,
	1482	'info_dict': {
	1483	'id': 'airdisasters',
	1484	'title': 'Air Disasters',
	1485	}
	1486	}]
	1487
	1488	def _real_extract(self, url):
	1489	mobj = re.match(self._VALID_URL, url)
	1490	playlist_id = mobj.group('id')
	1491	webpage = self._download_webpage(
	1492	url, playlist_id, 'Downloading show webpage')
	1493	# There's one playlist for each season of the show
	1494	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1495	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1496	entries = [
	1497	self.url_result(
	1498	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1499	for season in m_seasons
	1500	]
	1501	title = self._og_search_title(webpage, fatal=False)
	1502
	1503	return {
	1504	'_type': 'playlist',
	1505	'id': playlist_id,
	1506	'title': title,
	1507	'entries': entries,
	1508	}
	1509
	1510
	1511	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1512	"""
	1513	Base class for extractors that fetch info from
	1514	http://www.youtube.com/feed_ajax
	1515	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1516	"""
	1517	_LOGIN_REQUIRED = True
	1518	# use action_load_personal_feed instead of action_load_system_feed
	1519	_PERSONAL_FEED = False
	1520
	1521	@property
	1522	def _FEED_TEMPLATE(self):
	1523	action = 'action_load_system_feed'
	1524	if self._PERSONAL_FEED:
	1525	action = 'action_load_personal_feed'
	1526	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1527
	1528	@property
	1529	def IE_NAME(self):
	1530	return 'youtube:%s' % self._FEED_NAME
	1531
	1532	def _real_initialize(self):
	1533	self._login()
	1534
	1535	def _real_extract(self, url):
	1536	feed_entries = []
	1537	paging = 0
	1538	for i in itertools.count(1):
	1539	info = self._download_json(self._FEED_TEMPLATE % paging,
	1540	'%s feed' % self._FEED_NAME,
	1541	'Downloading page %s' % i)
	1542	feed_html = info.get('feed_html') or info.get('content_html')
	1543	load_more_widget_html = info.get('load_more_widget_html') or feed_html
	1544	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1545	ids = orderedSet(m.group(1) for m in m_ids)
	1546	feed_entries.extend(
	1547	self.url_result(video_id, 'Youtube', video_id=video_id)
	1548	for video_id in ids)
	1549	mobj = re.search(
	1550	r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
	1551	load_more_widget_html)
	1552	if mobj is None:
	1553	break
	1554	paging = mobj.group('paging')
	1555	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1556
	1557
	1558	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1559	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1560	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1561	_FEED_NAME = 'recommended'
	1562	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1563
	1564
	1565	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1566	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1567	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1568	_FEED_NAME = 'watch_later'
	1569	_PLAYLIST_TITLE = 'Youtube Watch Later'
	1570	_PERSONAL_FEED = True
	1571
	1572
	1573	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1574	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1575	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1576	_FEED_NAME = 'history'
	1577	_PERSONAL_FEED = True
	1578	_PLAYLIST_TITLE = 'Youtube Watch History'
	1579
	1580
	1581	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1582	IE_NAME = 'youtube:favorites'
	1583	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1584	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1585	_LOGIN_REQUIRED = True
	1586
	1587	def _real_extract(self, url):
	1588	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1589	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1590	return self.url_result(playlist_id, 'YoutubePlaylist')
	1591
	1592
	1593	class YoutubeSubscriptionsIE(YoutubePlaylistIE):
	1594	IE_NAME = 'youtube:subscriptions'
	1595	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1596	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1597	_TESTS = []
	1598
	1599	def _real_extract(self, url):
	1600	title = 'Youtube Subscriptions'
	1601	page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
	1602
	1603	# The extraction process is the same as for playlists, but the regex
	1604	# for the video ids doesn't contain an index
	1605	ids = []
	1606	more_widget_html = content_html = page
	1607
	1608	for page_num in itertools.count(1):
	1609	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1610	new_ids = orderedSet(matches)
	1611	ids.extend(new_ids)
	1612
	1613	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1614	if not mobj:
	1615	break
	1616
	1617	more = self._download_json(
	1618	'https://youtube.com/%s' % mobj.group('more'), title,
	1619	'Downloading page #%s' % page_num,
	1620	transform_source=uppercase_escape)
	1621	content_html = more['content_html']
	1622	more_widget_html = more['load_more_widget_html']
	1623
	1624	return {
	1625	'_type': 'playlist',
	1626	'title': title,
	1627	'entries': self._ids_to_results(ids),
	1628	}
	1629
	1630
	1631	class YoutubeTruncatedURLIE(InfoExtractor):
	1632	IE_NAME = 'youtube:truncated_url'
	1633	IE_DESC = False # Do not list
	1634	_VALID_URL = r'''(?x)
	1635	(?:https?://)?[^/]+/watch\?(?:
	1636	feature=[a-z_]+\|
	1637	annotation_id=annotation_[^&]+
	1638	)?$\|
	1639	(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
	1640	'''
	1641
	1642	_TESTS = [{
	1643	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1644	'only_matching': True,
	1645	}, {
	1646	'url': 'http://www.youtube.com/watch?',
	1647	'only_matching': True,
	1648	}]
	1649
	1650	def _real_extract(self, url):
	1651	raise ExtractorError(
	1652	'Did you forget to quote the URL? Remember that & is a meta '
	1653	'character in most shells, so you want to put the URL in quotes, '
	1654	'like youtube-dl '
	1655	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1656	' or simply youtube-dl BaW_jenozKc .',
	1657	expected=True)