jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from .subtitles import SubtitlesInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse,
	21	compat_urllib_request,
	22	compat_urlparse,
	23	compat_str,
	24	)
	25	from ..utils import (
	26	clean_html,
	27	ExtractorError,
	28	get_element_by_attribute,
	29	get_element_by_id,
	30	int_or_none,
	31	OnDemandPagedList,
	32	orderedSet,
	33	unescapeHTML,
	34	unified_strdate,
	35	uppercase_escape,
	36	)
	37
	38
	39	class YoutubeBaseInfoExtractor(InfoExtractor):
	40	"""Provide base functions for Youtube extractors"""
	41	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	42	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	43	_NETRC_MACHINE = 'youtube'
	44	# If True it will raise an error if no login info is provided
	45	_LOGIN_REQUIRED = False
	46
	47	def _set_language(self):
	48	self._set_cookie(
	49	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	50	# YouTube sets the expire time to about two months
	51	expire_time=time.time() + 2 * 30 * 24 * 3600)
	52
	53	def _login(self):
	54	"""
	55	Attempt to log in to YouTube.
	56	True is returned if successful or skipped.
	57	False is returned if login failed.
	58
	59	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	60	"""
	61	(username, password) = self._get_login_info()
	62	# No authentication to be performed
	63	if username is None:
	64	if self._LOGIN_REQUIRED:
	65	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	66	return True
	67
	68	login_page = self._download_webpage(
	69	self._LOGIN_URL, None,
	70	note='Downloading login page',
	71	errnote='unable to fetch login page', fatal=False)
	72	if login_page is False:
	73	return
	74
	75	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	76	login_page, 'Login GALX parameter')
	77
	78	# Log in
	79	login_form_strs = {
	80	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	81	'Email': username,
	82	'GALX': galx,
	83	'Passwd': password,
	84
	85	'PersistentCookie': 'yes',
	86	'_utf8': '霱',
	87	'bgresponse': 'js_disabled',
	88	'checkConnection': '',
	89	'checkedDomains': 'youtube',
	90	'dnConn': '',
	91	'pstMsg': '0',
	92	'rmShown': '1',
	93	'secTok': '',
	94	'signIn': 'Sign in',
	95	'timeStmp': '',
	96	'service': 'youtube',
	97	'uilel': '3',
	98	'hl': 'en_US',
	99	}
	100
	101	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	102	# chokes on unicode
	103	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	104	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	105
	106	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	107	login_results = self._download_webpage(
	108	req, None,
	109	note='Logging in', errnote='unable to log in', fatal=False)
	110	if login_results is False:
	111	return False
	112
	113	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	114	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	115
	116	# Two-Factor
	117	# TODO add SMS and phone call support - these require making a request and then prompting the user
	118
	119	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	120	tfa_code = self._get_tfa_info()
	121
	122	if tfa_code is None:
	123	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	124	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	125	return False
	126
	127	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	128
	129	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	130	if match is None:
	131	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	132	secTok = match.group(1)
	133	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	134	if match is None:
	135	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	136	timeStmp = match.group(1)
	137
	138	tfa_form_strs = {
	139	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	140	'smsToken': '',
	141	'smsUserPin': tfa_code,
	142	'smsVerifyPin': 'Verify',
	143
	144	'PersistentCookie': 'yes',
	145	'checkConnection': '',
	146	'checkedDomains': 'youtube',
	147	'pstMsg': '1',
	148	'secTok': secTok,
	149	'timeStmp': timeStmp,
	150	'service': 'youtube',
	151	'hl': 'en_US',
	152	}
	153	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	154	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	155
	156	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	157	tfa_results = self._download_webpage(
	158	tfa_req, None,
	159	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	160
	161	if tfa_results is False:
	162	return False
	163
	164	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	165	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	166	return False
	167	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	168	self._downloader.report_warning('unable to log in - did the page structure change?')
	169	return False
	170	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	171	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	172	return False
	173
	174	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	175	self._downloader.report_warning('unable to log in: bad username or password')
	176	return False
	177	return True
	178
	179	def _real_initialize(self):
	180	if self._downloader is None:
	181	return
	182	self._set_language()
	183	if not self._login():
	184	return
	185
	186
	187	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	188	IE_DESC = 'YouTube.com'
	189	_VALID_URL = r"""(?x)^
	190	(
	191	(?:https?://\|//) # http(s):// or protocol-independent URL
	192	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	193	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	194	(?:www\.)?pwnyoutube\.com/\|
	195	(?:www\.)?yourepeat\.com/\|
	196	tube\.majestyc\.net/\|
	197	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	198	(?:.*?\#/)? # handle anchor (#/) redirect urls
	199	(?: # the various things that can precede the ID:
	200	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	201	\|(?: # or the v= param in all its forms
	202	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	203	(?:\?\|\#!?) # the params delimiter ? or # or #!
	204	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	205	v=
	206	)
	207	))
	208	\|youtu\.be/ # just youtu.be/xxxx
	209	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	210	)
	211	)? # all until now is optional -> you can pass the naked ID
	212	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	213	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	214	(?(1).+)? # if we found the ID, everything can follow
	215	$"""
	216	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	217	_formats = {
	218	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	219	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	220	'13': {'ext': '3gp'},
	221	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	222	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	223	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	224	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	225	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	226	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	227	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	228	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	229	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	230	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	231	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	232	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	233
	234
	235	# 3d videos
	236	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	237	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	238	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	239	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	240	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	241	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	242	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	243
	244	# Apple HTTP Live Streaming
	245	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	246	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	247	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	248	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	249	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	250	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	251	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	252
	253	# DASH mp4 video
	254	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	255	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	256	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	257	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	258	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	259	'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	260	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	261	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	262	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	263	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	264	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	265
	266	# Dash mp4 audio
	267	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
	268	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
	269	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
	270
	271	# Dash webm
	272	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	273	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	274	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	275	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	276	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	277	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	278	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
	279	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	280	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	281	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	282	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	283	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	284	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	285	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	286	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	287	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	288	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	289	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	290	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
	291
	292	# Dash webm audio
	293	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	294	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	295
	296	# Dash webm audio with opus inside
	297	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	298	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	299	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	300
	301	# RTMP (unnamed)
	302	'_rtmp': {'protocol': 'rtmp'},
	303	}
	304
	305	IE_NAME = 'youtube'
	306	_TESTS = [
	307	{
	308	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
	309	'info_dict': {
	310	'id': 'BaW_jenozKc',
	311	'ext': 'mp4',
	312	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	313	'uploader': 'Philipp Hagemeister',
	314	'uploader_id': 'phihag',
	315	'upload_date': '20121002',
	316	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	317	'categories': ['Science & Technology'],
	318	'like_count': int,
	319	'dislike_count': int,
	320	}
	321	},
	322	{
	323	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	324	'note': 'Test generic use_cipher_signature video (#897)',
	325	'info_dict': {
	326	'id': 'UxxajLWwzqY',
	327	'ext': 'mp4',
	328	'upload_date': '20120506',
	329	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	330	'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
	331	'uploader': 'Icona Pop',
	332	'uploader_id': 'IconaPop',
	333	}
	334	},
	335	{
	336	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	337	'note': 'Test VEVO video with age protection (#956)',
	338	'info_dict': {
	339	'id': '07FYdnEawAQ',
	340	'ext': 'mp4',
	341	'upload_date': '20130703',
	342	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	343	'description': 'md5:64249768eec3bc4276236606ea996373',
	344	'uploader': 'justintimberlakeVEVO',
	345	'uploader_id': 'justintimberlakeVEVO',
	346	}
	347	},
	348	{
	349	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	350	'note': 'Embed-only video (#1746)',
	351	'info_dict': {
	352	'id': 'yZIXLfi8CZQ',
	353	'ext': 'mp4',
	354	'upload_date': '20120608',
	355	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	356	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	357	'uploader': 'SET India',
	358	'uploader_id': 'setindia'
	359	}
	360	},
	361	{
	362	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	363	'note': '256k DASH audio (format 141) via DASH manifest',
	364	'info_dict': {
	365	'id': 'a9LDPn-MO4I',
	366	'ext': 'm4a',
	367	'upload_date': '20121002',
	368	'uploader_id': '8KVIDEO',
	369	'description': '',
	370	'uploader': '8KVIDEO',
	371	'title': 'UHDTV TEST 8K VIDEO.mp4'
	372	},
	373	'params': {
	374	'youtube_include_dash_manifest': True,
	375	'format': '141',
	376	},
	377	},
	378	# DASH manifest with encrypted signature
	379	{
	380	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	381	'info_dict': {
	382	'id': 'IB3lcPjvWLA',
	383	'ext': 'm4a',
	384	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	385	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	386	'uploader': 'AfrojackVEVO',
	387	'uploader_id': 'AfrojackVEVO',
	388	'upload_date': '20131011',
	389	},
	390	'params': {
	391	'youtube_include_dash_manifest': True,
	392	'format': '141',
	393	},
	394	},
	395	# Controversy video
	396	{
	397	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	398	'info_dict': {
	399	'id': 'T4XJQO3qol8',
	400	'ext': 'mp4',
	401	'upload_date': '20100909',
	402	'uploader': 'The Amazing Atheist',
	403	'uploader_id': 'TheAmazingAtheist',
	404	'title': 'Burning Everyone\'s Koran',
	405	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	406	}
	407	},
	408	# Normal age-gate video (No vevo, embed allowed)
	409	{
	410	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	411	'info_dict': {
	412	'id': 'HtVdAasjOgU',
	413	'ext': 'mp4',
	414	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	415	'description': 'md5:eca57043abae25130f58f655ad9a7771',
	416	'uploader': 'The Witcher',
	417	'uploader_id': 'WitcherGame',
	418	'upload_date': '20140605',
	419	},
	420	},
	421	# Age-gate video with encrypted signature
	422	{
	423	'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
	424	'info_dict': {
	425	'id': '6kLq3WMV1nU',
	426	'ext': 'mp4',
	427	'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
	428	'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
	429	'uploader': 'LloydVEVO',
	430	'uploader_id': 'LloydVEVO',
	431	'upload_date': '20110629',
	432	},
	433	},
	434	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	435	{
	436	'url': '__2ABJjxzNo',
	437	'info_dict': {
	438	'id': '__2ABJjxzNo',
	439	'ext': 'mp4',
	440	'upload_date': '20100430',
	441	'uploader_id': 'deadmau5',
	442	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	443	'uploader': 'deadmau5',
	444	'title': 'Deadmau5 - Some Chords (HD)',
	445	},
	446	'expected_warnings': [
	447	'DASH manifest missing',
	448	]
	449	},
	450	# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
	451	{
	452	'url': 'lqQg6PlCWgI',
	453	'info_dict': {
	454	'id': 'lqQg6PlCWgI',
	455	'ext': 'mp4',
	456	'upload_date': '20120731',
	457	'uploader_id': 'olympic',
	458	'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
	459	'uploader': 'Olympics',
	460	'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
	461	},
	462	'params': {
	463	'skip_download': 'requires avconv',
	464	}
	465	},
	466	]
	467
	468	def __init__(self, args, *kwargs):
	469	super(YoutubeIE, self).__init__(args, *kwargs)
	470	self._player_cache = {}
	471
	472	def report_video_info_webpage_download(self, video_id):
	473	"""Report attempt to download video info webpage."""
	474	self.to_screen('%s: Downloading video info webpage' % video_id)
	475
	476	def report_information_extraction(self, video_id):
	477	"""Report attempt to extract video information."""
	478	self.to_screen('%s: Extracting video information' % video_id)
	479
	480	def report_unavailable_format(self, video_id, format):
	481	"""Report extracted video URL."""
	482	self.to_screen('%s: Format %s not available' % (video_id, format))
	483
	484	def report_rtmp_download(self):
	485	"""Indicate the download will use the RTMP protocol."""
	486	self.to_screen('RTMP download detected')
	487
	488	def _signature_cache_id(self, example_sig):
	489	""" Return a string representation of a signature """
	490	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	491
	492	def _extract_signature_function(self, video_id, player_url, example_sig):
	493	id_m = re.match(
	494	r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	495	player_url)
	496	if not id_m:
	497	raise ExtractorError('Cannot identify player %r' % player_url)
	498	player_type = id_m.group('ext')
	499	player_id = id_m.group('id')
	500
	501	# Read from filesystem cache
	502	func_id = '%s_%s_%s' % (
	503	player_type, player_id, self._signature_cache_id(example_sig))
	504	assert os.path.basename(func_id) == func_id
	505
	506	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	507	if cache_spec is not None:
	508	return lambda s: ''.join(s[i] for i in cache_spec)
	509
	510	if player_type == 'js':
	511	code = self._download_webpage(
	512	player_url, video_id,
	513	note='Downloading %s player %s' % (player_type, player_id),
	514	errnote='Download of %s failed' % player_url)
	515	res = self._parse_sig_js(code)
	516	elif player_type == 'swf':
	517	urlh = self._request_webpage(
	518	player_url, video_id,
	519	note='Downloading %s player %s' % (player_type, player_id),
	520	errnote='Download of %s failed' % player_url)
	521	code = urlh.read()
	522	res = self._parse_sig_swf(code)
	523	else:
	524	assert False, 'Invalid player type %r' % player_type
	525
	526	if cache_spec is None:
	527	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	528	cache_res = res(test_string)
	529	cache_spec = [ord(c) for c in cache_res]
	530
	531	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	532	return res
	533
	534	def _print_sig_code(self, func, example_sig):
	535	def gen_sig_code(idxs):
	536	def _genslice(start, end, step):
	537	starts = '' if start == 0 else str(start)
	538	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	539	steps = '' if step == 1 else (':%d' % step)
	540	return 's[%s%s%s]' % (starts, ends, steps)
	541
	542	step = None
	543	# Quelch pyflakes warnings - start will be set when step is set
	544	start = '(Never used)'
	545	for i, prev in zip(idxs[1:], idxs[:-1]):
	546	if step is not None:
	547	if i - prev == step:
	548	continue
	549	yield _genslice(start, prev, step)
	550	step = None
	551	continue
	552	if i - prev in [-1, 1]:
	553	step = i - prev
	554	start = prev
	555	continue
	556	else:
	557	yield 's[%d]' % prev
	558	if step is None:
	559	yield 's[%d]' % i
	560	else:
	561	yield _genslice(start, i, step)
	562
	563	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	564	cache_res = func(test_string)
	565	cache_spec = [ord(c) for c in cache_res]
	566	expr_code = ' + '.join(gen_sig_code(cache_spec))
	567	signature_id_tuple = '(%s)' % (
	568	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	569	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	570	' return %s\n') % (signature_id_tuple, expr_code)
	571	self.to_screen('Extracted signature function:\n' + code)
	572
	573	def _parse_sig_js(self, jscode):
	574	funcname = self._search_regex(
	575	r'\.sig\\|\\|([a-zA-Z0-9]+)\(', jscode,
	576	'Initial JS player signature function name')
	577
	578	jsi = JSInterpreter(jscode)
	579	initial_function = jsi.extract_function(funcname)
	580	return lambda s: initial_function([s])
	581
	582	def _parse_sig_swf(self, file_contents):
	583	swfi = SWFInterpreter(file_contents)
	584	TARGET_CLASSNAME = 'SignatureDecipher'
	585	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	586	initial_function = swfi.extract_function(searched_class, 'decipher')
	587	return lambda s: initial_function([s])
	588
	589	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	590	"""Turn the encrypted s field into a working signature"""
	591
	592	if player_url is None:
	593	raise ExtractorError('Cannot decrypt signature without player_url')
	594
	595	if player_url.startswith('//'):
	596	player_url = 'https:' + player_url
	597	try:
	598	player_id = (player_url, self._signature_cache_id(s))
	599	if player_id not in self._player_cache:
	600	func = self._extract_signature_function(
	601	video_id, player_url, s
	602	)
	603	self._player_cache[player_id] = func
	604	func = self._player_cache[player_id]
	605	if self._downloader.params.get('youtube_print_sig_code'):
	606	self._print_sig_code(func, s)
	607	return func(s)
	608	except Exception as e:
	609	tb = traceback.format_exc()
	610	raise ExtractorError(
	611	'Signature extraction failed: ' + tb, cause=e)
	612
	613	def _get_available_subtitles(self, video_id, webpage):
	614	try:
	615	subs_doc = self._download_xml(
	616	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	617	video_id, note=False)
	618	except ExtractorError as err:
	619	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	620	return {}
	621
	622	sub_lang_list = {}
	623	for track in subs_doc.findall('track'):
	624	lang = track.attrib['lang_code']
	625	if lang in sub_lang_list:
	626	continue
	627	params = compat_urllib_parse.urlencode({
	628	'lang': lang,
	629	'v': video_id,
	630	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	631	'name': track.attrib['name'].encode('utf-8'),
	632	})
	633	url = 'https://www.youtube.com/api/timedtext?' + params
	634	sub_lang_list[lang] = url
	635	if not sub_lang_list:
	636	self._downloader.report_warning('video doesn\'t have subtitles')
	637	return {}
	638	return sub_lang_list
	639
	640	def _get_available_automatic_caption(self, video_id, webpage):
	641	"""We need the webpage for getting the captions url, pass it as an
	642	argument to speed up the process."""
	643	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	644	self.to_screen('%s: Looking for automatic captions' % video_id)
	645	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	646	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	647	if mobj is None:
	648	self._downloader.report_warning(err_msg)
	649	return {}
	650	player_config = json.loads(mobj.group(1))
	651	try:
	652	args = player_config['args']
	653	caption_url = args['ttsurl']
	654	timestamp = args['timestamp']
	655	# We get the available subtitles
	656	list_params = compat_urllib_parse.urlencode({
	657	'type': 'list',
	658	'tlangs': 1,
	659	'asrs': 1,
	660	})
	661	list_url = caption_url + '&' + list_params
	662	caption_list = self._download_xml(list_url, video_id)
	663	original_lang_node = caption_list.find('track')
	664	if original_lang_node is None:
	665	self._downloader.report_warning('Video doesn\'t have automatic captions')
	666	return {}
	667	original_lang = original_lang_node.attrib['lang_code']
	668	caption_kind = original_lang_node.attrib.get('kind', '')
	669
	670	sub_lang_list = {}
	671	for lang_node in caption_list.findall('target'):
	672	sub_lang = lang_node.attrib['lang_code']
	673	params = compat_urllib_parse.urlencode({
	674	'lang': original_lang,
	675	'tlang': sub_lang,
	676	'fmt': sub_format,
	677	'ts': timestamp,
	678	'kind': caption_kind,
	679	})
	680	sub_lang_list[sub_lang] = caption_url + '&' + params
	681	return sub_lang_list
	682	# An extractor error can be raise by the download process if there are
	683	# no automatic captions but there are subtitles
	684	except (KeyError, ExtractorError):
	685	self._downloader.report_warning(err_msg)
	686	return {}
	687
	688	@classmethod
	689	def extract_id(cls, url):
	690	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	691	if mobj is None:
	692	raise ExtractorError('Invalid URL: %s' % url)
	693	video_id = mobj.group(2)
	694	return video_id
	695
	696	def _extract_from_m3u8(self, manifest_url, video_id):
	697	url_map = {}
	698
	699	def _get_urls(_manifest):
	700	lines = _manifest.split('\n')
	701	urls = filter(lambda l: l and not l.startswith('#'),
	702	lines)
	703	return urls
	704	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	705	formats_urls = _get_urls(manifest)
	706	for format_url in formats_urls:
	707	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	708	url_map[itag] = format_url
	709	return url_map
	710
	711	def _extract_annotations(self, video_id):
	712	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	713	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	714
	715	def _parse_dash_manifest(
	716	self, video_id, dash_manifest_url, player_url, age_gate):
	717	def decrypt_sig(mobj):
	718	s = mobj.group(1)
	719	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	720	return '/signature/%s' % dec_s
	721	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	722	dash_doc = self._download_xml(
	723	dash_manifest_url, video_id,
	724	note='Downloading DASH manifest',
	725	errnote='Could not download DASH manifest')
	726
	727	formats = []
	728	for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	729	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	730	if url_el is None:
	731	continue
	732	format_id = r.attrib['id']
	733	video_url = url_el.text
	734	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	735	f = {
	736	'format_id': format_id,
	737	'url': video_url,
	738	'width': int_or_none(r.attrib.get('width')),
	739	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	740	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	741	'filesize': filesize,
	742	'fps': int_or_none(r.attrib.get('frameRate')),
	743	}
	744	try:
	745	existing_format = next(
	746	fo for fo in formats
	747	if fo['format_id'] == format_id)
	748	except StopIteration:
	749	f.update(self._formats.get(format_id, {}))
	750	formats.append(f)
	751	else:
	752	existing_format.update(f)
	753	return formats
	754
	755	def _real_extract(self, url):
	756	proto = (
	757	'http' if self._downloader.params.get('prefer_insecure', False)
	758	else 'https')
	759
	760	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	761	mobj = re.search(self._NEXT_URL_RE, url)
	762	if mobj:
	763	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	764	video_id = self.extract_id(url)
	765
	766	# Get video webpage
	767	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	768	video_webpage = self._download_webpage(url, video_id)
	769
	770	# Attempt to extract SWF player URL
	771	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	772	if mobj is not None:
	773	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	774	else:
	775	player_url = None
	776
	777	# Get video info
	778	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	779	age_gate = True
	780	# We simulate the access to the video from www.youtube.com/v/{video_id}
	781	# this can be viewed without login into Youtube
	782	url = proto + '://www.youtube.com/embed/%s' % video_id
	783	embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
	784	data = compat_urllib_parse.urlencode({
	785	'video_id': video_id,
	786	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	787	'sts': self._search_regex(
	788	r'"sts"\s:\s(\d+)', embed_webpage, 'sts', default=''),
	789	})
	790	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	791	video_info_webpage = self._download_webpage(
	792	video_info_url, video_id,
	793	note='Refetching age-gated info webpage',
	794	errnote='unable to download video info webpage')
	795	video_info = compat_parse_qs(video_info_webpage)
	796	else:
	797	age_gate = False
	798	try:
	799	# Try looking directly into the video webpage
	800	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	801	if not mobj:
	802	raise ValueError('Could not find ytplayer.config') # caught below
	803	json_code = uppercase_escape(mobj.group(1))
	804	ytplayer_config = json.loads(json_code)
	805	args = ytplayer_config['args']
	806	# Convert to the same format returned by compat_parse_qs
	807	video_info = dict((k, [v]) for k, v in args.items())
	808	if 'url_encoded_fmt_stream_map' not in args:
	809	raise ValueError('No stream_map present') # caught below
	810	except ValueError:
	811	# We fallback to the get_video_info pages (used by the embed page)
	812	self.report_video_info_webpage_download(video_id)
	813	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	814	video_info_url = (
	815	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	816	% (proto, video_id, el_type))
	817	video_info_webpage = self._download_webpage(
	818	video_info_url,
	819	video_id, note=False,
	820	errnote='unable to download video info webpage')
	821	video_info = compat_parse_qs(video_info_webpage)
	822	if 'token' in video_info:
	823	break
	824	if 'token' not in video_info:
	825	if 'reason' in video_info:
	826	raise ExtractorError(
	827	'YouTube said: %s' % video_info['reason'][0],
	828	expected=True, video_id=video_id)
	829	else:
	830	raise ExtractorError(
	831	'"token" parameter not in video info for unknown reason',
	832	video_id=video_id)
	833
	834	if 'view_count' in video_info:
	835	view_count = int(video_info['view_count'][0])
	836	else:
	837	view_count = None
	838
	839	# Check for "rental" videos
	840	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	841	raise ExtractorError('"rental" videos not supported')
	842
	843	# Start extracting information
	844	self.report_information_extraction(video_id)
	845
	846	# uploader
	847	if 'author' not in video_info:
	848	raise ExtractorError('Unable to extract uploader name')
	849	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	850
	851	# uploader_id
	852	video_uploader_id = None
	853	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	854	if mobj is not None:
	855	video_uploader_id = mobj.group(1)
	856	else:
	857	self._downloader.report_warning('unable to extract uploader nickname')
	858
	859	# title
	860	if 'title' in video_info:
	861	video_title = video_info['title'][0]
	862	else:
	863	self._downloader.report_warning('Unable to extract video title')
	864	video_title = '_'
	865
	866	# thumbnail image
	867	# We try first to get a high quality image:
	868	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	869	video_webpage, re.DOTALL)
	870	if m_thumb is not None:
	871	video_thumbnail = m_thumb.group(1)
	872	elif 'thumbnail_url' not in video_info:
	873	self._downloader.report_warning('unable to extract video thumbnail')
	874	video_thumbnail = None
	875	else: # don't panic if we can't find it
	876	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	877
	878	# upload date
	879	upload_date = None
	880	mobj = re.search(r'(?s)id="eow-date.?>(.?)</span>', video_webpage)
	881	if mobj is None:
	882	mobj = re.search(
	883	r'(?s)id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live) on (.*?)</strong>',
	884	video_webpage)
	885	if mobj is not None:
	886	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	887	upload_date = unified_strdate(upload_date)
	888
	889	m_cat_container = self._search_regex(
	890	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	891	video_webpage, 'categories', default=None)
	892	if m_cat_container:
	893	category = self._html_search_regex(
	894	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	895	default=None)
	896	video_categories = None if category is None else [category]
	897	else:
	898	video_categories = None
	899
	900	# description
	901	video_description = get_element_by_id("eow-description", video_webpage)
	902	if video_description:
	903	video_description = re.sub(r'''(?x)
	904	<a\s+
	905	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	906	title="([^"]+)"\s+
	907	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	908	class="yt-uix-redirect-link"\s*>
	909	[^<]+
	910	</a>
	911	''', r'\1', video_description)
	912	video_description = clean_html(video_description)
	913	else:
	914	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	915	if fd_mobj:
	916	video_description = unescapeHTML(fd_mobj.group(1))
	917	else:
	918	video_description = ''
	919
	920	def _extract_count(count_name):
	921	count = self._search_regex(
	922	r'id="watch-%s"[^>]>.?([\d,]+)\s*</span>' % re.escape(count_name),
	923	video_webpage, count_name, default=None)
	924	if count is not None:
	925	return int(count.replace(',', ''))
	926	return None
	927	like_count = _extract_count('like')
	928	dislike_count = _extract_count('dislike')
	929
	930	# subtitles
	931	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	932
	933	if self._downloader.params.get('listsubtitles', False):
	934	self._list_available_subtitles(video_id, video_webpage)
	935	return
	936
	937	if 'length_seconds' not in video_info:
	938	self._downloader.report_warning('unable to extract video duration')
	939	video_duration = None
	940	else:
	941	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	942
	943	# annotations
	944	video_annotations = None
	945	if self._downloader.params.get('writeannotations', False):
	946	video_annotations = self._extract_annotations(video_id)
	947
	948	def _map_to_format_list(urlmap):
	949	formats = []
	950	for itag, video_real_url in urlmap.items():
	951	dct = {
	952	'format_id': itag,
	953	'url': video_real_url,
	954	'player_url': player_url,
	955	}
	956	if itag in self._formats:
	957	dct.update(self._formats[itag])
	958	formats.append(dct)
	959	return formats
	960
	961	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	962	self.report_rtmp_download()
	963	formats = [{
	964	'format_id': '_rtmp',
	965	'protocol': 'rtmp',
	966	'url': video_info['conn'][0],
	967	'player_url': player_url,
	968	}]
	969	elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
	970	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	971	if 'rtmpe%3Dyes' in encoded_url_map:
	972	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	973	url_map = {}
	974	for url_data_str in encoded_url_map.split(','):
	975	url_data = compat_parse_qs(url_data_str)
	976	if 'itag' not in url_data or 'url' not in url_data:
	977	continue
	978	format_id = url_data['itag'][0]
	979	url = url_data['url'][0]
	980
	981	if 'sig' in url_data:
	982	url += '&signature=' + url_data['sig'][0]
	983	elif 's' in url_data:
	984	encrypted_sig = url_data['s'][0]
	985
	986	jsplayer_url_json = self._search_regex(
	987	r'"assets":.+?"js":\s*("[^"]+")',
	988	embed_webpage if age_gate else video_webpage, 'JS player URL')
	989	player_url = json.loads(jsplayer_url_json)
	990	if player_url is None:
	991	player_url_json = self._search_regex(
	992	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	993	video_webpage, 'age gate player URL')
	994	player_url = json.loads(player_url_json)
	995
	996	if self._downloader.params.get('verbose'):
	997	if player_url is None:
	998	player_version = 'unknown'
	999	player_desc = 'unknown'
	1000	else:
	1001	if player_url.endswith('swf'):
	1002	player_version = self._search_regex(
	1003	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	1004	'flash player', fatal=False)
	1005	player_desc = 'flash player %s' % player_version
	1006	else:
	1007	player_version = self._search_regex(
	1008	r'html5player-([^/]+?)(?:/html5player)?\.js',
	1009	player_url,
	1010	'html5 player', fatal=False)
	1011	player_desc = 'html5 player %s' % player_version
	1012
	1013	parts_sizes = self._signature_cache_id(encrypted_sig)
	1014	self.to_screen('{%s} signature length %s, %s' %
	1015	(format_id, parts_sizes, player_desc))
	1016
	1017	signature = self._decrypt_signature(
	1018	encrypted_sig, video_id, player_url, age_gate)
	1019	url += '&signature=' + signature
	1020	if 'ratebypass' not in url:
	1021	url += '&ratebypass=yes'
	1022	url_map[format_id] = url
	1023	formats = _map_to_format_list(url_map)
	1024	elif video_info.get('hlsvp'):
	1025	manifest_url = video_info['hlsvp'][0]
	1026	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1027	formats = _map_to_format_list(url_map)
	1028	else:
	1029	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1030
	1031	# Look for the DASH manifest
	1032	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1033	dash_mpd = video_info.get('dashmpd')
	1034	if dash_mpd:
	1035	dash_manifest_url = dash_mpd[0]
	1036	try:
	1037	dash_formats = self._parse_dash_manifest(
	1038	video_id, dash_manifest_url, player_url, age_gate)
	1039	except (ExtractorError, KeyError) as e:
	1040	self.report_warning(
	1041	'Skipping DASH manifest: %r' % e, video_id)
	1042	else:
	1043	formats.extend(dash_formats)
	1044
	1045	self._sort_formats(formats)
	1046
	1047	return {
	1048	'id': video_id,
	1049	'uploader': video_uploader,
	1050	'uploader_id': video_uploader_id,
	1051	'upload_date': upload_date,
	1052	'title': video_title,
	1053	'thumbnail': video_thumbnail,
	1054	'description': video_description,
	1055	'categories': video_categories,
	1056	'subtitles': video_subtitles,
	1057	'duration': video_duration,
	1058	'age_limit': 18 if age_gate else 0,
	1059	'annotations': video_annotations,
	1060	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1061	'view_count': view_count,
	1062	'like_count': like_count,
	1063	'dislike_count': dislike_count,
	1064	'formats': formats,
	1065	}
	1066
	1067
	1068	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1069	IE_DESC = 'YouTube.com playlists'
	1070	_VALID_URL = r"""(?x)(?:
	1071	(?:https?://)?
	1072	(?:\w+\.)?
	1073	youtube\.com/
	1074	(?:
	1075	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1076	\? (?:.?&)? (?:p\|a\|list)=
	1077	\| p/
	1078	)
	1079	(
	1080	(?:PL\|LL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1081	# Top tracks, they can also include dots
	1082	\|(?:MC)[\w\.]*
	1083	)
	1084	.*
	1085	\|
	1086	((?:PL\|LL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1087	)"""
	1088	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1089	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1090	IE_NAME = 'youtube:playlist'
	1091	_TESTS = [{
	1092	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1093	'info_dict': {
	1094	'title': 'ytdl test PL',
	1095	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1096	},
	1097	'playlist_count': 3,
	1098	}, {
	1099	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1100	'info_dict': {
	1101	'title': 'YDL_Empty_List',
	1102	},
	1103	'playlist_count': 0,
	1104	}, {
	1105	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1106	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1107	'info_dict': {
	1108	'title': '29C3: Not my department',
	1109	},
	1110	'playlist_count': 95,
	1111	}, {
	1112	'note': 'issue #673',
	1113	'url': 'PLBB231211A4F62143',
	1114	'info_dict': {
	1115	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1116	},
	1117	'playlist_mincount': 26,
	1118	}, {
	1119	'note': 'Large playlist',
	1120	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1121	'info_dict': {
	1122	'title': 'Uploads from Cauchemar',
	1123	},
	1124	'playlist_mincount': 799,
	1125	}, {
	1126	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1127	'info_dict': {
	1128	'title': 'YDL_safe_search',
	1129	},
	1130	'playlist_count': 2,
	1131	}, {
	1132	'note': 'embedded',
	1133	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1134	'playlist_count': 4,
	1135	'info_dict': {
	1136	'title': 'JODA15',
	1137	}
	1138	}, {
	1139	'note': 'Embedded SWF player',
	1140	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1141	'playlist_count': 4,
	1142	'info_dict': {
	1143	'title': 'JODA7',
	1144	}
	1145	}, {
	1146	'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
	1147	'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
	1148	'info_dict': {
	1149	'title': 'Uploads from Interstellar Movie',
	1150	},
	1151	'playlist_mincout': 21,
	1152	}]
	1153
	1154	def _real_initialize(self):
	1155	self._login()
	1156
	1157	def _ids_to_results(self, ids):
	1158	return [
	1159	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1160	for vid_id in ids]
	1161
	1162	def _extract_mix(self, playlist_id):
	1163	# The mixes are generated from a a single video
	1164	# the id of the playlist is just 'RD' + video_id
	1165	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1166	webpage = self._download_webpage(
	1167	url, playlist_id, 'Downloading Youtube mix')
	1168	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1169	title_span = (
	1170	search_title('playlist-title') or
	1171	search_title('title long-title') or
	1172	search_title('title'))
	1173	title = clean_html(title_span)
	1174	ids = orderedSet(re.findall(
	1175	r'''(?xs)data-video-username=".?".?
	1176	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1177	webpage))
	1178	url_results = self._ids_to_results(ids)
	1179
	1180	return self.playlist_result(url_results, playlist_id, title)
	1181
	1182	def _real_extract(self, url):
	1183	# Extract playlist id
	1184	mobj = re.match(self._VALID_URL, url)
	1185	if mobj is None:
	1186	raise ExtractorError('Invalid URL: %s' % url)
	1187	playlist_id = mobj.group(1) or mobj.group(2)
	1188
	1189	# Check if it's a video-specific URL
	1190	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1191	if 'v' in query_dict:
	1192	video_id = query_dict['v'][0]
	1193	if self._downloader.params.get('noplaylist'):
	1194	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1195	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1196	else:
	1197	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1198
	1199	if playlist_id.startswith('RD'):
	1200	# Mixes require a custom extraction process
	1201	return self._extract_mix(playlist_id)
	1202	if playlist_id.startswith('TL'):
	1203	raise ExtractorError('For downloading YouTube.com top lists, use '
	1204	'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
	1205
	1206	url = self._TEMPLATE_URL % playlist_id
	1207	page = self._download_webpage(url, playlist_id)
	1208	more_widget_html = content_html = page
	1209
	1210	# Check if the playlist exists or is private
	1211	if re.search(r'<div class="yt-alert-message">[^<]?(The\|This) playlist (does not exist\|is private)[^<]?</div>', page) is not None:
	1212	raise ExtractorError(
	1213	'The playlist doesn\'t exist or is private, use --username or '
	1214	'--netrc to access it.',
	1215	expected=True)
	1216
	1217	# Extract the video ids from the playlist pages
	1218	ids = []
	1219
	1220	for page_num in itertools.count(1):
	1221	matches = re.finditer(self._VIDEO_RE, content_html)
	1222	# We remove the duplicates and the link with index 0
	1223	# (it's not the first video of the playlist)
	1224	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1225	ids.extend(new_ids)
	1226
	1227	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1228	if not mobj:
	1229	break
	1230
	1231	more = self._download_json(
	1232	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1233	'Downloading page #%s' % page_num,
	1234	transform_source=uppercase_escape)
	1235	content_html = more['content_html']
	1236	if not content_html.strip():
	1237	# Some webpages show a "Load more" button but they don't
	1238	# have more videos
	1239	break
	1240	more_widget_html = more['load_more_widget_html']
	1241
	1242	playlist_title = self._html_search_regex(
	1243	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1244	page, 'title')
	1245
	1246	url_results = self._ids_to_results(ids)
	1247	return self.playlist_result(url_results, playlist_id, playlist_title)
	1248
	1249
	1250	class YoutubeTopListIE(YoutubePlaylistIE):
	1251	IE_NAME = 'youtube:toplist'
	1252	IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
	1253	' (Example: "yttoplist:music:Top Tracks")')
	1254	_VALID_URL = r'yttoplist:(?P<chann>.?):(?P<title>.?)$'
	1255	_TESTS = [{
	1256	'url': 'yttoplist:music:Trending',
	1257	'playlist_mincount': 5,
	1258	'skip': 'Only works for logged-in users',
	1259	}]
	1260
	1261	def _real_extract(self, url):
	1262	mobj = re.match(self._VALID_URL, url)
	1263	channel = mobj.group('chann')
	1264	title = mobj.group('title')
	1265	query = compat_urllib_parse.urlencode({'title': title})
	1266	channel_page = self._download_webpage(
	1267	'https://www.youtube.com/%s' % channel, title)
	1268	link = self._html_search_regex(
	1269	r'''(?x)
	1270	<a\s+href="([^"]+)".?>\s
	1271	<span\s+class="branded-page-module-title-text">\s*
	1272	<span[^>]>.?%s.*?</span>''' % re.escape(query),
	1273	channel_page, 'list')
	1274	url = compat_urlparse.urljoin('https://www.youtube.com/', link)
	1275
	1276	video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
	1277	ids = []
	1278	# sometimes the webpage doesn't contain the videos
	1279	# retry until we get them
	1280	for i in itertools.count(0):
	1281	msg = 'Downloading Youtube mix'
	1282	if i > 0:
	1283	msg += ', retry #%d' % i
	1284
	1285	webpage = self._download_webpage(url, title, msg)
	1286	ids = orderedSet(re.findall(video_re, webpage))
	1287	if ids:
	1288	break
	1289	url_results = self._ids_to_results(ids)
	1290	return self.playlist_result(url_results, playlist_title=title)
	1291
	1292
	1293	class YoutubeChannelIE(InfoExtractor):
	1294	IE_DESC = 'YouTube.com channels'
	1295	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1296	IE_NAME = 'youtube:channel'
	1297	_TESTS = [{
	1298	'note': 'paginated channel',
	1299	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1300	'playlist_mincount': 91,
	1301	}]
	1302
	1303	def extract_videos_from_page(self, page):
	1304	ids_in_page = []
	1305	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1306	if mobj.group(1) not in ids_in_page:
	1307	ids_in_page.append(mobj.group(1))
	1308	return ids_in_page
	1309
	1310	def _real_extract(self, url):
	1311	channel_id = self._match_id(url)
	1312
	1313	video_ids = []
	1314	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1315	channel_page = self._download_webpage(url, channel_id)
	1316	autogenerated = re.search(r'''(?x)
	1317	class="[^"]*?(?:
	1318	channel-header-autogenerated-label\|
	1319	yt-channel-title-autogenerated
	1320	)[^"]*"''', channel_page) is not None
	1321
	1322	if autogenerated:
	1323	# The videos are contained in a single page
	1324	# the ajax pages can't be used, they are empty
	1325	video_ids = self.extract_videos_from_page(channel_page)
	1326	entries = [
	1327	self.url_result(video_id, 'Youtube', video_id=video_id)
	1328	for video_id in video_ids]
	1329	return self.playlist_result(entries, channel_id)
	1330
	1331	def _entries():
	1332	more_widget_html = content_html = channel_page
	1333	for pagenum in itertools.count(1):
	1334
	1335	ids_in_page = self.extract_videos_from_page(content_html)
	1336	for video_id in ids_in_page:
	1337	yield self.url_result(
	1338	video_id, 'Youtube', video_id=video_id)
	1339
	1340	mobj = re.search(
	1341	r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
	1342	more_widget_html)
	1343	if not mobj:
	1344	break
	1345
	1346	more = self._download_json(
	1347	'https://youtube.com/%s' % mobj.group('more'), channel_id,
	1348	'Downloading page #%s' % (pagenum + 1),
	1349	transform_source=uppercase_escape)
	1350	content_html = more['content_html']
	1351	more_widget_html = more['load_more_widget_html']
	1352
	1353	return self.playlist_result(_entries(), channel_id)
	1354
	1355
	1356	class YoutubeUserIE(InfoExtractor):
	1357	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1358	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1359	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1360	_GDATA_PAGE_SIZE = 50
	1361	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1362	IE_NAME = 'youtube:user'
	1363
	1364	_TESTS = [{
	1365	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1366	'playlist_mincount': 320,
	1367	'info_dict': {
	1368	'title': 'TheLinuxFoundation',
	1369	}
	1370	}, {
	1371	'url': 'ytuser:phihag',
	1372	'only_matching': True,
	1373	}]
	1374
	1375	@classmethod
	1376	def suitable(cls, url):
	1377	# Don't return True if the url can be extracted with other youtube
	1378	# extractor, the regex would is too permissive and it would match.
	1379	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1380	if any(ie.suitable(url) for ie in other_ies):
	1381	return False
	1382	else:
	1383	return super(YoutubeUserIE, cls).suitable(url)
	1384
	1385	def _real_extract(self, url):
	1386	username = self._match_id(url)
	1387
	1388	# Download video ids using YouTube Data API. Result size per
	1389	# query is limited (currently to 50 videos) so we need to query
	1390	# page by page until there are no video ids - it means we got
	1391	# all of them.
	1392
	1393	def download_page(pagenum):
	1394	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1395
	1396	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1397	page = self._download_webpage(
	1398	gdata_url, username,
	1399	'Downloading video ids from %d to %d' % (
	1400	start_index, start_index + self._GDATA_PAGE_SIZE))
	1401
	1402	try:
	1403	response = json.loads(page)
	1404	except ValueError as err:
	1405	raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
	1406	if 'entry' not in response['feed']:
	1407	return
	1408
	1409	# Extract video identifiers
	1410	entries = response['feed']['entry']
	1411	for entry in entries:
	1412	title = entry['title']['$t']
	1413	video_id = entry['id']['$t'].split('/')[-1]
	1414	yield {
	1415	'_type': 'url',
	1416	'url': video_id,
	1417	'ie_key': 'Youtube',
	1418	'id': video_id,
	1419	'title': title,
	1420	}
	1421	url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
	1422
	1423	return self.playlist_result(url_results, playlist_title=username)
	1424
	1425
	1426	class YoutubeSearchIE(SearchInfoExtractor):
	1427	IE_DESC = 'YouTube.com searches'
	1428	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1429	_MAX_RESULTS = 1000
	1430	IE_NAME = 'youtube:search'
	1431	_SEARCH_KEY = 'ytsearch'
	1432
	1433	def _get_n_results(self, query, n):
	1434	"""Get a specified number of results for a query"""
	1435
	1436	video_ids = []
	1437	pagenum = 0
	1438	limit = n
	1439	PAGE_SIZE = 50
	1440
	1441	while (PAGE_SIZE * pagenum) < limit:
	1442	result_url = self._API_URL % (
	1443	compat_urllib_parse.quote_plus(query.encode('utf-8')),
	1444	(PAGE_SIZE * pagenum) + 1)
	1445	data_json = self._download_webpage(
	1446	result_url, video_id='query "%s"' % query,
	1447	note='Downloading page %s' % (pagenum + 1),
	1448	errnote='Unable to download API page')
	1449	data = json.loads(data_json)
	1450	api_response = data['data']
	1451
	1452	if 'items' not in api_response:
	1453	raise ExtractorError(
	1454	'[youtube] No video results', expected=True)
	1455
	1456	new_ids = list(video['id'] for video in api_response['items'])
	1457	video_ids += new_ids
	1458
	1459	limit = min(n, api_response['totalItems'])
	1460	pagenum += 1
	1461
	1462	if len(video_ids) > n:
	1463	video_ids = video_ids[:n]
	1464	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1465	for video_id in video_ids]
	1466	return self.playlist_result(videos, query)
	1467
	1468
	1469	class YoutubeSearchDateIE(YoutubeSearchIE):
	1470	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1471	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1472	_SEARCH_KEY = 'ytsearchdate'
	1473	IE_DESC = 'YouTube.com searches, newest videos first'
	1474
	1475
	1476	class YoutubeSearchURLIE(InfoExtractor):
	1477	IE_DESC = 'YouTube.com search URLs'
	1478	IE_NAME = 'youtube:search_url'
	1479	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1480	_TESTS = [{
	1481	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1482	'playlist_mincount': 5,
	1483	'info_dict': {
	1484	'title': 'youtube-dl test video',
	1485	}
	1486	}]
	1487
	1488	def _real_extract(self, url):
	1489	mobj = re.match(self._VALID_URL, url)
	1490	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1491
	1492	webpage = self._download_webpage(url, query)
	1493	result_code = self._search_regex(
	1494	r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1495
	1496	part_codes = re.findall(
	1497	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1498	entries = []
	1499	for part_code in part_codes:
	1500	part_title = self._html_search_regex(
	1501	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1502	part_url_snippet = self._html_search_regex(
	1503	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1504	part_url = compat_urlparse.urljoin(
	1505	'https://www.youtube.com/', part_url_snippet)
	1506	entries.append({
	1507	'_type': 'url',
	1508	'url': part_url,
	1509	'title': part_title,
	1510	})
	1511
	1512	return {
	1513	'_type': 'playlist',
	1514	'entries': entries,
	1515	'title': query,
	1516	}
	1517
	1518
	1519	class YoutubeShowIE(InfoExtractor):
	1520	IE_DESC = 'YouTube.com (multi-season) shows'
	1521	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1522	IE_NAME = 'youtube:show'
	1523	_TESTS = [{
	1524	'url': 'http://www.youtube.com/show/airdisasters',
	1525	'playlist_mincount': 3,
	1526	'info_dict': {
	1527	'id': 'airdisasters',
	1528	'title': 'Air Disasters',
	1529	}
	1530	}]
	1531
	1532	def _real_extract(self, url):
	1533	mobj = re.match(self._VALID_URL, url)
	1534	playlist_id = mobj.group('id')
	1535	webpage = self._download_webpage(
	1536	url, playlist_id, 'Downloading show webpage')
	1537	# There's one playlist for each season of the show
	1538	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1539	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1540	entries = [
	1541	self.url_result(
	1542	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1543	for season in m_seasons
	1544	]
	1545	title = self._og_search_title(webpage, fatal=False)
	1546
	1547	return {
	1548	'_type': 'playlist',
	1549	'id': playlist_id,
	1550	'title': title,
	1551	'entries': entries,
	1552	}
	1553
	1554
	1555	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1556	"""
	1557	Base class for extractors that fetch info from
	1558	http://www.youtube.com/feed_ajax
	1559	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1560	"""
	1561	_LOGIN_REQUIRED = True
	1562	# use action_load_personal_feed instead of action_load_system_feed
	1563	_PERSONAL_FEED = False
	1564
	1565	@property
	1566	def _FEED_TEMPLATE(self):
	1567	action = 'action_load_system_feed'
	1568	if self._PERSONAL_FEED:
	1569	action = 'action_load_personal_feed'
	1570	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1571
	1572	@property
	1573	def IE_NAME(self):
	1574	return 'youtube:%s' % self._FEED_NAME
	1575
	1576	def _real_initialize(self):
	1577	self._login()
	1578
	1579	def _real_extract(self, url):
	1580	feed_entries = []
	1581	paging = 0
	1582	for i in itertools.count(1):
	1583	info = self._download_json(
	1584	self._FEED_TEMPLATE % paging,
	1585	'%s feed' % self._FEED_NAME,
	1586	'Downloading page %s' % i,
	1587	transform_source=uppercase_escape)
	1588	feed_html = info.get('feed_html') or info.get('content_html')
	1589	load_more_widget_html = info.get('load_more_widget_html') or feed_html
	1590	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1591	ids = orderedSet(m.group(1) for m in m_ids)
	1592	feed_entries.extend(
	1593	self.url_result(video_id, 'Youtube', video_id=video_id)
	1594	for video_id in ids)
	1595	mobj = re.search(
	1596	r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
	1597	load_more_widget_html)
	1598	if mobj is None:
	1599	break
	1600	paging = mobj.group('paging')
	1601	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1602
	1603
	1604	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1605	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1606	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1607	_FEED_NAME = 'recommended'
	1608	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1609
	1610
	1611	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1612	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1613	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1614	_FEED_NAME = 'watch_later'
	1615	_PLAYLIST_TITLE = 'Youtube Watch Later'
	1616	_PERSONAL_FEED = True
	1617
	1618
	1619	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1620	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1621	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1622	_FEED_NAME = 'history'
	1623	_PERSONAL_FEED = True
	1624	_PLAYLIST_TITLE = 'Youtube Watch History'
	1625
	1626
	1627	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1628	IE_NAME = 'youtube:favorites'
	1629	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1630	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1631	_LOGIN_REQUIRED = True
	1632
	1633	def _real_extract(self, url):
	1634	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1635	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1636	return self.url_result(playlist_id, 'YoutubePlaylist')
	1637
	1638
	1639	class YoutubeSubscriptionsIE(YoutubePlaylistIE):
	1640	IE_NAME = 'youtube:subscriptions'
	1641	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1642	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1643	_TESTS = []
	1644
	1645	def _real_extract(self, url):
	1646	title = 'Youtube Subscriptions'
	1647	page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
	1648
	1649	# The extraction process is the same as for playlists, but the regex
	1650	# for the video ids doesn't contain an index
	1651	ids = []
	1652	more_widget_html = content_html = page
	1653
	1654	for page_num in itertools.count(1):
	1655	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1656	new_ids = orderedSet(matches)
	1657	ids.extend(new_ids)
	1658
	1659	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1660	if not mobj:
	1661	break
	1662
	1663	more = self._download_json(
	1664	'https://youtube.com/%s' % mobj.group('more'), title,
	1665	'Downloading page #%s' % page_num,
	1666	transform_source=uppercase_escape)
	1667	content_html = more['content_html']
	1668	more_widget_html = more['load_more_widget_html']
	1669
	1670	return {
	1671	'_type': 'playlist',
	1672	'title': title,
	1673	'entries': self._ids_to_results(ids),
	1674	}
	1675
	1676
	1677	class YoutubeTruncatedURLIE(InfoExtractor):
	1678	IE_NAME = 'youtube:truncated_url'
	1679	IE_DESC = False # Do not list
	1680	_VALID_URL = r'''(?x)
	1681	(?:https?://)?[^/]+/watch\?(?:
	1682	feature=[a-z_]+\|
	1683	annotation_id=annotation_[^&]+
	1684	)?$\|
	1685	(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
	1686	'''
	1687
	1688	_TESTS = [{
	1689	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1690	'only_matching': True,
	1691	}, {
	1692	'url': 'http://www.youtube.com/watch?',
	1693	'only_matching': True,
	1694	}]
	1695
	1696	def _real_extract(self, url):
	1697	raise ExtractorError(
	1698	'Did you forget to quote the URL? Remember that & is a meta '
	1699	'character in most shells, so you want to put the URL in quotes, '
	1700	'like youtube-dl '
	1701	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1702	' or simply youtube-dl BaW_jenozKc .',
	1703	expected=True)