jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from .subtitles import SubtitlesInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse,
	21	compat_urllib_request,
	22	compat_urlparse,
	23	compat_str,
	24	)
	25	from ..utils import (
	26	clean_html,
	27	ExtractorError,
	28	get_element_by_attribute,
	29	get_element_by_id,
	30	int_or_none,
	31	OnDemandPagedList,
	32	orderedSet,
	33	unescapeHTML,
	34	unified_strdate,
	35	uppercase_escape,
	36	)
	37
	38
	39	class YoutubeBaseInfoExtractor(InfoExtractor):
	40	"""Provide base functions for Youtube extractors"""
	41	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	42	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	43	_NETRC_MACHINE = 'youtube'
	44	# If True it will raise an error if no login info is provided
	45	_LOGIN_REQUIRED = False
	46
	47	def _set_language(self):
	48	self._set_cookie(
	49	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	50	# YouTube sets the expire time to about two months
	51	expire_time=time.time() + 2 * 30 * 24 * 3600)
	52
	53	def _login(self):
	54	"""
	55	Attempt to log in to YouTube.
	56	True is returned if successful or skipped.
	57	False is returned if login failed.
	58
	59	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	60	"""
	61	(username, password) = self._get_login_info()
	62	# No authentication to be performed
	63	if username is None:
	64	if self._LOGIN_REQUIRED:
	65	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	66	return True
	67
	68	login_page = self._download_webpage(
	69	self._LOGIN_URL, None,
	70	note='Downloading login page',
	71	errnote='unable to fetch login page', fatal=False)
	72	if login_page is False:
	73	return
	74
	75	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	76	login_page, 'Login GALX parameter')
	77
	78	# Log in
	79	login_form_strs = {
	80	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	81	'Email': username,
	82	'GALX': galx,
	83	'Passwd': password,
	84
	85	'PersistentCookie': 'yes',
	86	'_utf8': '霱',
	87	'bgresponse': 'js_disabled',
	88	'checkConnection': '',
	89	'checkedDomains': 'youtube',
	90	'dnConn': '',
	91	'pstMsg': '0',
	92	'rmShown': '1',
	93	'secTok': '',
	94	'signIn': 'Sign in',
	95	'timeStmp': '',
	96	'service': 'youtube',
	97	'uilel': '3',
	98	'hl': 'en_US',
	99	}
	100
	101	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	102	# chokes on unicode
	103	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	104	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	105
	106	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	107	login_results = self._download_webpage(
	108	req, None,
	109	note='Logging in', errnote='unable to log in', fatal=False)
	110	if login_results is False:
	111	return False
	112
	113	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	114	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	115
	116	# Two-Factor
	117	# TODO add SMS and phone call support - these require making a request and then prompting the user
	118
	119	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	120	tfa_code = self._get_tfa_info()
	121
	122	if tfa_code is None:
	123	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	124	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	125	return False
	126
	127	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	128
	129	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	130	if match is None:
	131	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	132	secTok = match.group(1)
	133	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	134	if match is None:
	135	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	136	timeStmp = match.group(1)
	137
	138	tfa_form_strs = {
	139	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	140	'smsToken': '',
	141	'smsUserPin': tfa_code,
	142	'smsVerifyPin': 'Verify',
	143
	144	'PersistentCookie': 'yes',
	145	'checkConnection': '',
	146	'checkedDomains': 'youtube',
	147	'pstMsg': '1',
	148	'secTok': secTok,
	149	'timeStmp': timeStmp,
	150	'service': 'youtube',
	151	'hl': 'en_US',
	152	}
	153	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	154	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	155
	156	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	157	tfa_results = self._download_webpage(
	158	tfa_req, None,
	159	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	160
	161	if tfa_results is False:
	162	return False
	163
	164	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	165	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	166	return False
	167	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	168	self._downloader.report_warning('unable to log in - did the page structure change?')
	169	return False
	170	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	171	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	172	return False
	173
	174	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	175	self._downloader.report_warning('unable to log in: bad username or password')
	176	return False
	177	return True
	178
	179	def _real_initialize(self):
	180	if self._downloader is None:
	181	return
	182	self._set_language()
	183	if not self._login():
	184	return
	185
	186
	187	class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
	188	IE_DESC = 'YouTube.com'
	189	_VALID_URL = r"""(?x)^
	190	(
	191	(?:https?://\|//) # http(s):// or protocol-independent URL
	192	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	193	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	194	(?:www\.)?pwnyoutube\.com/\|
	195	(?:www\.)?yourepeat\.com/\|
	196	tube\.majestyc\.net/\|
	197	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	198	(?:.*?\#/)? # handle anchor (#/) redirect urls
	199	(?: # the various things that can precede the ID:
	200	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	201	\|(?: # or the v= param in all its forms
	202	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	203	(?:\?\|\#!?) # the params delimiter ? or # or #!
	204	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	205	v=
	206	)
	207	))
	208	\|youtu\.be/ # just youtu.be/xxxx
	209	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	210	)
	211	)? # all until now is optional -> you can pass the naked ID
	212	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	213	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	214	(?(1).+)? # if we found the ID, everything can follow
	215	$"""
	216	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	217	_formats = {
	218	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	219	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	220	'13': {'ext': '3gp'},
	221	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	222	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	223	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	224	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	225	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	226	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	227	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	228	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	229	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	230	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	231	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	232	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	233
	234
	235	# 3d videos
	236	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	237	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	238	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	239	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	240	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	241	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	242	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	243
	244	# Apple HTTP Live Streaming
	245	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	246	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	247	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	248	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	249	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	250	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	251	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	252
	253	# DASH mp4 video
	254	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	255	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	256	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	257	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	258	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	259	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	260	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	261	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	262	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	263	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	264	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	265
	266	# Dash mp4 audio
	267	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	268	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	269	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	270
	271	# Dash webm
	272	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	273	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	274	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	275	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	276	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	277	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	278	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
	279	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	280	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	281	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	282	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	283	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	284	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	285	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	286	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	287	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	288	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	289	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	290	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	291	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
	292	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	293
	294	# Dash webm audio
	295	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	296	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	297
	298	# Dash webm audio with opus inside
	299	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	300	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	301	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	302
	303	# RTMP (unnamed)
	304	'_rtmp': {'protocol': 'rtmp'},
	305	}
	306
	307	IE_NAME = 'youtube'
	308	_TESTS = [
	309	{
	310	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
	311	'info_dict': {
	312	'id': 'BaW_jenozKc',
	313	'ext': 'mp4',
	314	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	315	'uploader': 'Philipp Hagemeister',
	316	'uploader_id': 'phihag',
	317	'upload_date': '20121002',
	318	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	319	'categories': ['Science & Technology'],
	320	'like_count': int,
	321	'dislike_count': int,
	322	}
	323	},
	324	{
	325	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	326	'note': 'Test generic use_cipher_signature video (#897)',
	327	'info_dict': {
	328	'id': 'UxxajLWwzqY',
	329	'ext': 'mp4',
	330	'upload_date': '20120506',
	331	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	332	'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
	333	'uploader': 'Icona Pop',
	334	'uploader_id': 'IconaPop',
	335	}
	336	},
	337	{
	338	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	339	'note': 'Test VEVO video with age protection (#956)',
	340	'info_dict': {
	341	'id': '07FYdnEawAQ',
	342	'ext': 'mp4',
	343	'upload_date': '20130703',
	344	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	345	'description': 'md5:64249768eec3bc4276236606ea996373',
	346	'uploader': 'justintimberlakeVEVO',
	347	'uploader_id': 'justintimberlakeVEVO',
	348	}
	349	},
	350	{
	351	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	352	'note': 'Embed-only video (#1746)',
	353	'info_dict': {
	354	'id': 'yZIXLfi8CZQ',
	355	'ext': 'mp4',
	356	'upload_date': '20120608',
	357	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	358	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	359	'uploader': 'SET India',
	360	'uploader_id': 'setindia'
	361	}
	362	},
	363	{
	364	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	365	'note': '256k DASH audio (format 141) via DASH manifest',
	366	'info_dict': {
	367	'id': 'a9LDPn-MO4I',
	368	'ext': 'm4a',
	369	'upload_date': '20121002',
	370	'uploader_id': '8KVIDEO',
	371	'description': '',
	372	'uploader': '8KVIDEO',
	373	'title': 'UHDTV TEST 8K VIDEO.mp4'
	374	},
	375	'params': {
	376	'youtube_include_dash_manifest': True,
	377	'format': '141',
	378	},
	379	},
	380	# DASH manifest with encrypted signature
	381	{
	382	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	383	'info_dict': {
	384	'id': 'IB3lcPjvWLA',
	385	'ext': 'm4a',
	386	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	387	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	388	'uploader': 'AfrojackVEVO',
	389	'uploader_id': 'AfrojackVEVO',
	390	'upload_date': '20131011',
	391	},
	392	'params': {
	393	'youtube_include_dash_manifest': True,
	394	'format': '141',
	395	},
	396	},
	397	# JS player signature function name containing $
	398	{
	399	'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
	400	'info_dict': {
	401	'id': 'nfWlot6h_JM',
	402	'ext': 'm4a',
	403	'title': 'Taylor Swift - Shake It Off',
	404	'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
	405	'uploader': 'TaylorSwiftVEVO',
	406	'uploader_id': 'TaylorSwiftVEVO',
	407	'upload_date': '20140818',
	408	},
	409	'params': {
	410	'youtube_include_dash_manifest': True,
	411	'format': '141',
	412	},
	413	},
	414	# Controversy video
	415	{
	416	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	417	'info_dict': {
	418	'id': 'T4XJQO3qol8',
	419	'ext': 'mp4',
	420	'upload_date': '20100909',
	421	'uploader': 'The Amazing Atheist',
	422	'uploader_id': 'TheAmazingAtheist',
	423	'title': 'Burning Everyone\'s Koran',
	424	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	425	}
	426	},
	427	# Normal age-gate video (No vevo, embed allowed)
	428	{
	429	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	430	'info_dict': {
	431	'id': 'HtVdAasjOgU',
	432	'ext': 'mp4',
	433	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	434	'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
	435	'uploader': 'The Witcher',
	436	'uploader_id': 'WitcherGame',
	437	'upload_date': '20140605',
	438	},
	439	},
	440	# Age-gate video with encrypted signature
	441	{
	442	'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
	443	'info_dict': {
	444	'id': '6kLq3WMV1nU',
	445	'ext': 'mp4',
	446	'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
	447	'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
	448	'uploader': 'LloydVEVO',
	449	'uploader_id': 'LloydVEVO',
	450	'upload_date': '20110629',
	451	},
	452	},
	453	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	454	{
	455	'url': '__2ABJjxzNo',
	456	'info_dict': {
	457	'id': '__2ABJjxzNo',
	458	'ext': 'mp4',
	459	'upload_date': '20100430',
	460	'uploader_id': 'deadmau5',
	461	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	462	'uploader': 'deadmau5',
	463	'title': 'Deadmau5 - Some Chords (HD)',
	464	},
	465	'expected_warnings': [
	466	'DASH manifest missing',
	467	]
	468	},
	469	# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
	470	{
	471	'url': 'lqQg6PlCWgI',
	472	'info_dict': {
	473	'id': 'lqQg6PlCWgI',
	474	'ext': 'mp4',
	475	'upload_date': '20120731',
	476	'uploader_id': 'olympic',
	477	'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
	478	'uploader': 'Olympics',
	479	'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
	480	},
	481	'params': {
	482	'skip_download': 'requires avconv',
	483	}
	484	},
	485	# Non-square pixels
	486	{
	487	'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
	488	'info_dict': {
	489	'id': '_b-2C3KPAM0',
	490	'ext': 'mp4',
	491	'stretched_ratio': 16 / 9.,
	492	'upload_date': '20110310',
	493	'uploader_id': 'AllenMeow',
	494	'description': 'made by Wacom from Korea \| 字幕&加油添醋 by TY\'s Allen \| 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
	495	'uploader': '孫艾倫',
	496	'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',
	497	},
	498	}
	499	]
	500
	501	def __init__(self, args, *kwargs):
	502	super(YoutubeIE, self).__init__(args, *kwargs)
	503	self._player_cache = {}
	504
	505	def report_video_info_webpage_download(self, video_id):
	506	"""Report attempt to download video info webpage."""
	507	self.to_screen('%s: Downloading video info webpage' % video_id)
	508
	509	def report_information_extraction(self, video_id):
	510	"""Report attempt to extract video information."""
	511	self.to_screen('%s: Extracting video information' % video_id)
	512
	513	def report_unavailable_format(self, video_id, format):
	514	"""Report extracted video URL."""
	515	self.to_screen('%s: Format %s not available' % (video_id, format))
	516
	517	def report_rtmp_download(self):
	518	"""Indicate the download will use the RTMP protocol."""
	519	self.to_screen('RTMP download detected')
	520
	521	def _signature_cache_id(self, example_sig):
	522	""" Return a string representation of a signature """
	523	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	524
	525	def _extract_signature_function(self, video_id, player_url, example_sig):
	526	id_m = re.match(
	527	r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	528	player_url)
	529	if not id_m:
	530	raise ExtractorError('Cannot identify player %r' % player_url)
	531	player_type = id_m.group('ext')
	532	player_id = id_m.group('id')
	533
	534	# Read from filesystem cache
	535	func_id = '%s_%s_%s' % (
	536	player_type, player_id, self._signature_cache_id(example_sig))
	537	assert os.path.basename(func_id) == func_id
	538
	539	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	540	if cache_spec is not None:
	541	return lambda s: ''.join(s[i] for i in cache_spec)
	542
	543	if player_type == 'js':
	544	code = self._download_webpage(
	545	player_url, video_id,
	546	note='Downloading %s player %s' % (player_type, player_id),
	547	errnote='Download of %s failed' % player_url)
	548	res = self._parse_sig_js(code)
	549	elif player_type == 'swf':
	550	urlh = self._request_webpage(
	551	player_url, video_id,
	552	note='Downloading %s player %s' % (player_type, player_id),
	553	errnote='Download of %s failed' % player_url)
	554	code = urlh.read()
	555	res = self._parse_sig_swf(code)
	556	else:
	557	assert False, 'Invalid player type %r' % player_type
	558
	559	if cache_spec is None:
	560	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	561	cache_res = res(test_string)
	562	cache_spec = [ord(c) for c in cache_res]
	563
	564	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	565	return res
	566
	567	def _print_sig_code(self, func, example_sig):
	568	def gen_sig_code(idxs):
	569	def _genslice(start, end, step):
	570	starts = '' if start == 0 else str(start)
	571	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	572	steps = '' if step == 1 else (':%d' % step)
	573	return 's[%s%s%s]' % (starts, ends, steps)
	574
	575	step = None
	576	# Quelch pyflakes warnings - start will be set when step is set
	577	start = '(Never used)'
	578	for i, prev in zip(idxs[1:], idxs[:-1]):
	579	if step is not None:
	580	if i - prev == step:
	581	continue
	582	yield _genslice(start, prev, step)
	583	step = None
	584	continue
	585	if i - prev in [-1, 1]:
	586	step = i - prev
	587	start = prev
	588	continue
	589	else:
	590	yield 's[%d]' % prev
	591	if step is None:
	592	yield 's[%d]' % i
	593	else:
	594	yield _genslice(start, i, step)
	595
	596	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	597	cache_res = func(test_string)
	598	cache_spec = [ord(c) for c in cache_res]
	599	expr_code = ' + '.join(gen_sig_code(cache_spec))
	600	signature_id_tuple = '(%s)' % (
	601	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	602	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	603	' return %s\n') % (signature_id_tuple, expr_code)
	604	self.to_screen('Extracted signature function:\n' + code)
	605
	606	def _parse_sig_js(self, jscode):
	607	funcname = self._search_regex(
	608	r'\.sig\\|\\|([a-zA-Z0-9$]+)\(', jscode,
	609	'Initial JS player signature function name')
	610
	611	jsi = JSInterpreter(jscode)
	612	initial_function = jsi.extract_function(funcname)
	613	return lambda s: initial_function([s])
	614
	615	def _parse_sig_swf(self, file_contents):
	616	swfi = SWFInterpreter(file_contents)
	617	TARGET_CLASSNAME = 'SignatureDecipher'
	618	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	619	initial_function = swfi.extract_function(searched_class, 'decipher')
	620	return lambda s: initial_function([s])
	621
	622	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	623	"""Turn the encrypted s field into a working signature"""
	624
	625	if player_url is None:
	626	raise ExtractorError('Cannot decrypt signature without player_url')
	627
	628	if player_url.startswith('//'):
	629	player_url = 'https:' + player_url
	630	try:
	631	player_id = (player_url, self._signature_cache_id(s))
	632	if player_id not in self._player_cache:
	633	func = self._extract_signature_function(
	634	video_id, player_url, s
	635	)
	636	self._player_cache[player_id] = func
	637	func = self._player_cache[player_id]
	638	if self._downloader.params.get('youtube_print_sig_code'):
	639	self._print_sig_code(func, s)
	640	return func(s)
	641	except Exception as e:
	642	tb = traceback.format_exc()
	643	raise ExtractorError(
	644	'Signature extraction failed: ' + tb, cause=e)
	645
	646	def _get_available_subtitles(self, video_id, webpage):
	647	try:
	648	subs_doc = self._download_xml(
	649	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	650	video_id, note=False)
	651	except ExtractorError as err:
	652	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	653	return {}
	654
	655	sub_lang_list = {}
	656	for track in subs_doc.findall('track'):
	657	lang = track.attrib['lang_code']
	658	if lang in sub_lang_list:
	659	continue
	660	params = compat_urllib_parse.urlencode({
	661	'lang': lang,
	662	'v': video_id,
	663	'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
	664	'name': track.attrib['name'].encode('utf-8'),
	665	})
	666	url = 'https://www.youtube.com/api/timedtext?' + params
	667	sub_lang_list[lang] = url
	668	if not sub_lang_list:
	669	self._downloader.report_warning('video doesn\'t have subtitles')
	670	return {}
	671	return sub_lang_list
	672
	673	def _get_available_automatic_caption(self, video_id, webpage):
	674	"""We need the webpage for getting the captions url, pass it as an
	675	argument to speed up the process."""
	676	sub_format = self._downloader.params.get('subtitlesformat', 'srt')
	677	self.to_screen('%s: Looking for automatic captions' % video_id)
	678	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	679	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	680	if mobj is None:
	681	self._downloader.report_warning(err_msg)
	682	return {}
	683	player_config = json.loads(mobj.group(1))
	684	try:
	685	args = player_config['args']
	686	caption_url = args['ttsurl']
	687	timestamp = args['timestamp']
	688	# We get the available subtitles
	689	list_params = compat_urllib_parse.urlencode({
	690	'type': 'list',
	691	'tlangs': 1,
	692	'asrs': 1,
	693	})
	694	list_url = caption_url + '&' + list_params
	695	caption_list = self._download_xml(list_url, video_id)
	696	original_lang_node = caption_list.find('track')
	697	if original_lang_node is None:
	698	self._downloader.report_warning('Video doesn\'t have automatic captions')
	699	return {}
	700	original_lang = original_lang_node.attrib['lang_code']
	701	caption_kind = original_lang_node.attrib.get('kind', '')
	702
	703	sub_lang_list = {}
	704	for lang_node in caption_list.findall('target'):
	705	sub_lang = lang_node.attrib['lang_code']
	706	params = compat_urllib_parse.urlencode({
	707	'lang': original_lang,
	708	'tlang': sub_lang,
	709	'fmt': sub_format,
	710	'ts': timestamp,
	711	'kind': caption_kind,
	712	})
	713	sub_lang_list[sub_lang] = caption_url + '&' + params
	714	return sub_lang_list
	715	# An extractor error can be raise by the download process if there are
	716	# no automatic captions but there are subtitles
	717	except (KeyError, ExtractorError):
	718	self._downloader.report_warning(err_msg)
	719	return {}
	720
	721	@classmethod
	722	def extract_id(cls, url):
	723	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	724	if mobj is None:
	725	raise ExtractorError('Invalid URL: %s' % url)
	726	video_id = mobj.group(2)
	727	return video_id
	728
	729	def _extract_from_m3u8(self, manifest_url, video_id):
	730	url_map = {}
	731
	732	def _get_urls(_manifest):
	733	lines = _manifest.split('\n')
	734	urls = filter(lambda l: l and not l.startswith('#'),
	735	lines)
	736	return urls
	737	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	738	formats_urls = _get_urls(manifest)
	739	for format_url in formats_urls:
	740	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	741	url_map[itag] = format_url
	742	return url_map
	743
	744	def _extract_annotations(self, video_id):
	745	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	746	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	747
	748	def _parse_dash_manifest(
	749	self, video_id, dash_manifest_url, player_url, age_gate):
	750	def decrypt_sig(mobj):
	751	s = mobj.group(1)
	752	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	753	return '/signature/%s' % dec_s
	754	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	755	dash_doc = self._download_xml(
	756	dash_manifest_url, video_id,
	757	note='Downloading DASH manifest',
	758	errnote='Could not download DASH manifest')
	759
	760	formats = []
	761	for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	762	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	763	if url_el is None:
	764	continue
	765	format_id = r.attrib['id']
	766	video_url = url_el.text
	767	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	768	f = {
	769	'format_id': format_id,
	770	'url': video_url,
	771	'width': int_or_none(r.attrib.get('width')),
	772	'height': int_or_none(r.attrib.get('height')),
	773	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	774	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	775	'filesize': filesize,
	776	'fps': int_or_none(r.attrib.get('frameRate')),
	777	}
	778	try:
	779	existing_format = next(
	780	fo for fo in formats
	781	if fo['format_id'] == format_id)
	782	except StopIteration:
	783	f.update(self._formats.get(format_id, {}).items())
	784	formats.append(f)
	785	else:
	786	existing_format.update(f)
	787	return formats
	788
	789	def _real_extract(self, url):
	790	proto = (
	791	'http' if self._downloader.params.get('prefer_insecure', False)
	792	else 'https')
	793
	794	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	795	mobj = re.search(self._NEXT_URL_RE, url)
	796	if mobj:
	797	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	798	video_id = self.extract_id(url)
	799
	800	# Get video webpage
	801	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	802	video_webpage = self._download_webpage(url, video_id)
	803
	804	# Attempt to extract SWF player URL
	805	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	806	if mobj is not None:
	807	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	808	else:
	809	player_url = None
	810
	811	# Get video info
	812	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	813	age_gate = True
	814	# We simulate the access to the video from www.youtube.com/v/{video_id}
	815	# this can be viewed without login into Youtube
	816	url = proto + '://www.youtube.com/embed/%s' % video_id
	817	embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
	818	data = compat_urllib_parse.urlencode({
	819	'video_id': video_id,
	820	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	821	'sts': self._search_regex(
	822	r'"sts"\s:\s(\d+)', embed_webpage, 'sts', default=''),
	823	})
	824	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	825	video_info_webpage = self._download_webpage(
	826	video_info_url, video_id,
	827	note='Refetching age-gated info webpage',
	828	errnote='unable to download video info webpage')
	829	video_info = compat_parse_qs(video_info_webpage)
	830	else:
	831	age_gate = False
	832	try:
	833	# Try looking directly into the video webpage
	834	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	835	if not mobj:
	836	raise ValueError('Could not find ytplayer.config') # caught below
	837	json_code = uppercase_escape(mobj.group(1))
	838	ytplayer_config = json.loads(json_code)
	839	args = ytplayer_config['args']
	840	# Convert to the same format returned by compat_parse_qs
	841	video_info = dict((k, [v]) for k, v in args.items())
	842	if 'url_encoded_fmt_stream_map' not in args:
	843	raise ValueError('No stream_map present') # caught below
	844	except ValueError:
	845	# We fallback to the get_video_info pages (used by the embed page)
	846	self.report_video_info_webpage_download(video_id)
	847	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	848	video_info_url = (
	849	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	850	% (proto, video_id, el_type))
	851	video_info_webpage = self._download_webpage(
	852	video_info_url,
	853	video_id, note=False,
	854	errnote='unable to download video info webpage')
	855	video_info = compat_parse_qs(video_info_webpage)
	856	if 'token' in video_info:
	857	break
	858	if 'token' not in video_info:
	859	if 'reason' in video_info:
	860	raise ExtractorError(
	861	'YouTube said: %s' % video_info['reason'][0],
	862	expected=True, video_id=video_id)
	863	else:
	864	raise ExtractorError(
	865	'"token" parameter not in video info for unknown reason',
	866	video_id=video_id)
	867
	868	if 'view_count' in video_info:
	869	view_count = int(video_info['view_count'][0])
	870	else:
	871	view_count = None
	872
	873	# Check for "rental" videos
	874	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	875	raise ExtractorError('"rental" videos not supported')
	876
	877	# Start extracting information
	878	self.report_information_extraction(video_id)
	879
	880	# uploader
	881	if 'author' not in video_info:
	882	raise ExtractorError('Unable to extract uploader name')
	883	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	884
	885	# uploader_id
	886	video_uploader_id = None
	887	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	888	if mobj is not None:
	889	video_uploader_id = mobj.group(1)
	890	else:
	891	self._downloader.report_warning('unable to extract uploader nickname')
	892
	893	# title
	894	if 'title' in video_info:
	895	video_title = video_info['title'][0]
	896	else:
	897	self._downloader.report_warning('Unable to extract video title')
	898	video_title = '_'
	899
	900	# thumbnail image
	901	# We try first to get a high quality image:
	902	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	903	video_webpage, re.DOTALL)
	904	if m_thumb is not None:
	905	video_thumbnail = m_thumb.group(1)
	906	elif 'thumbnail_url' not in video_info:
	907	self._downloader.report_warning('unable to extract video thumbnail')
	908	video_thumbnail = None
	909	else: # don't panic if we can't find it
	910	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	911
	912	# upload date
	913	upload_date = None
	914	mobj = re.search(r'(?s)id="eow-date.?>(.?)</span>', video_webpage)
	915	if mobj is None:
	916	mobj = re.search(
	917	r'(?s)id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live) on (.*?)</strong>',
	918	video_webpage)
	919	if mobj is not None:
	920	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	921	upload_date = unified_strdate(upload_date)
	922
	923	m_cat_container = self._search_regex(
	924	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	925	video_webpage, 'categories', default=None)
	926	if m_cat_container:
	927	category = self._html_search_regex(
	928	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	929	default=None)
	930	video_categories = None if category is None else [category]
	931	else:
	932	video_categories = None
	933
	934	# description
	935	video_description = get_element_by_id("eow-description", video_webpage)
	936	if video_description:
	937	video_description = re.sub(r'''(?x)
	938	<a\s+
	939	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	940	title="([^"]+)"\s+
	941	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	942	class="yt-uix-redirect-link"\s*>
	943	[^<]+
	944	</a>
	945	''', r'\1', video_description)
	946	video_description = clean_html(video_description)
	947	else:
	948	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	949	if fd_mobj:
	950	video_description = unescapeHTML(fd_mobj.group(1))
	951	else:
	952	video_description = ''
	953
	954	def _extract_count(count_name):
	955	count = self._search_regex(
	956	r'id="watch-%s"[^>]>.?([\d,]+)\s*</span>' % re.escape(count_name),
	957	video_webpage, count_name, default=None)
	958	if count is not None:
	959	return int(count.replace(',', ''))
	960	return None
	961	like_count = _extract_count('like')
	962	dislike_count = _extract_count('dislike')
	963
	964	# subtitles
	965	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	966
	967	if self._downloader.params.get('listsubtitles', False):
	968	self._list_available_subtitles(video_id, video_webpage)
	969	return
	970
	971	if 'length_seconds' not in video_info:
	972	self._downloader.report_warning('unable to extract video duration')
	973	video_duration = None
	974	else:
	975	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	976
	977	# annotations
	978	video_annotations = None
	979	if self._downloader.params.get('writeannotations', False):
	980	video_annotations = self._extract_annotations(video_id)
	981
	982	def _map_to_format_list(urlmap):
	983	formats = []
	984	for itag, video_real_url in urlmap.items():
	985	dct = {
	986	'format_id': itag,
	987	'url': video_real_url,
	988	'player_url': player_url,
	989	}
	990	if itag in self._formats:
	991	dct.update(self._formats[itag])
	992	formats.append(dct)
	993	return formats
	994
	995	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	996	self.report_rtmp_download()
	997	formats = [{
	998	'format_id': '_rtmp',
	999	'protocol': 'rtmp',
	1000	'url': video_info['conn'][0],
	1001	'player_url': player_url,
	1002	}]
	1003	elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
	1004	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	1005	if 'rtmpe%3Dyes' in encoded_url_map:
	1006	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1007	url_map = {}
	1008	for url_data_str in encoded_url_map.split(','):
	1009	url_data = compat_parse_qs(url_data_str)
	1010	if 'itag' not in url_data or 'url' not in url_data:
	1011	continue
	1012	format_id = url_data['itag'][0]
	1013	url = url_data['url'][0]
	1014
	1015	if 'sig' in url_data:
	1016	url += '&signature=' + url_data['sig'][0]
	1017	elif 's' in url_data:
	1018	encrypted_sig = url_data['s'][0]
	1019
	1020	jsplayer_url_json = self._search_regex(
	1021	r'"assets":.+?"js":\s*("[^"]+")',
	1022	embed_webpage if age_gate else video_webpage, 'JS player URL')
	1023	player_url = json.loads(jsplayer_url_json)
	1024	if player_url is None:
	1025	player_url_json = self._search_regex(
	1026	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	1027	video_webpage, 'age gate player URL')
	1028	player_url = json.loads(player_url_json)
	1029
	1030	if self._downloader.params.get('verbose'):
	1031	if player_url is None:
	1032	player_version = 'unknown'
	1033	player_desc = 'unknown'
	1034	else:
	1035	if player_url.endswith('swf'):
	1036	player_version = self._search_regex(
	1037	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	1038	'flash player', fatal=False)
	1039	player_desc = 'flash player %s' % player_version
	1040	else:
	1041	player_version = self._search_regex(
	1042	r'html5player-([^/]+?)(?:/html5player)?\.js',
	1043	player_url,
	1044	'html5 player', fatal=False)
	1045	player_desc = 'html5 player %s' % player_version
	1046
	1047	parts_sizes = self._signature_cache_id(encrypted_sig)
	1048	self.to_screen('{%s} signature length %s, %s' %
	1049	(format_id, parts_sizes, player_desc))
	1050
	1051	signature = self._decrypt_signature(
	1052	encrypted_sig, video_id, player_url, age_gate)
	1053	url += '&signature=' + signature
	1054	if 'ratebypass' not in url:
	1055	url += '&ratebypass=yes'
	1056	url_map[format_id] = url
	1057	formats = _map_to_format_list(url_map)
	1058	elif video_info.get('hlsvp'):
	1059	manifest_url = video_info['hlsvp'][0]
	1060	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1061	formats = _map_to_format_list(url_map)
	1062	else:
	1063	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1064
	1065	# Look for the DASH manifest
	1066	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1067	dash_mpd = video_info.get('dashmpd')
	1068	if dash_mpd:
	1069	dash_manifest_url = dash_mpd[0]
	1070	try:
	1071	dash_formats = self._parse_dash_manifest(
	1072	video_id, dash_manifest_url, player_url, age_gate)
	1073	except (ExtractorError, KeyError) as e:
	1074	self.report_warning(
	1075	'Skipping DASH manifest: %r' % e, video_id)
	1076	else:
	1077	# Hide the formats we found through non-DASH
	1078	dash_keys = set(df['format_id'] for df in dash_formats)
	1079	for f in formats:
	1080	if f['format_id'] in dash_keys:
	1081	f['format_id'] = 'nondash-%s' % f['format_id']
	1082	f['preference'] = f.get('preference', 0) - 10000
	1083	formats.extend(dash_formats)
	1084
	1085	# Check for malformed aspect ratio
	1086	stretched_m = re.search(
	1087	r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
	1088	video_webpage)
	1089	if stretched_m:
	1090	ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
	1091	for f in formats:
	1092	if f.get('vcodec') != 'none':
	1093	f['stretched_ratio'] = ratio
	1094
	1095	self._sort_formats(formats)
	1096
	1097	return {
	1098	'id': video_id,
	1099	'uploader': video_uploader,
	1100	'uploader_id': video_uploader_id,
	1101	'upload_date': upload_date,
	1102	'title': video_title,
	1103	'thumbnail': video_thumbnail,
	1104	'description': video_description,
	1105	'categories': video_categories,
	1106	'subtitles': video_subtitles,
	1107	'duration': video_duration,
	1108	'age_limit': 18 if age_gate else 0,
	1109	'annotations': video_annotations,
	1110	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1111	'view_count': view_count,
	1112	'like_count': like_count,
	1113	'dislike_count': dislike_count,
	1114	'formats': formats,
	1115	}
	1116
	1117
	1118	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1119	IE_DESC = 'YouTube.com playlists'
	1120	_VALID_URL = r"""(?x)(?:
	1121	(?:https?://)?
	1122	(?:\w+\.)?
	1123	youtube\.com/
	1124	(?:
	1125	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1126	\? (?:.?&)? (?:p\|a\|list)=
	1127	\| p/
	1128	)
	1129	(
	1130	(?:PL\|LL\|EC\|UU\|FL\|RD)?[0-9A-Za-z-_]{10,}
	1131	# Top tracks, they can also include dots
	1132	\|(?:MC)[\w\.]*
	1133	)
	1134	.*
	1135	\|
	1136	((?:PL\|LL\|EC\|UU\|FL\|RD)[0-9A-Za-z-_]{10,})
	1137	)"""
	1138	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1139	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1140	IE_NAME = 'youtube:playlist'
	1141	_TESTS = [{
	1142	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1143	'info_dict': {
	1144	'title': 'ytdl test PL',
	1145	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1146	},
	1147	'playlist_count': 3,
	1148	}, {
	1149	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1150	'info_dict': {
	1151	'title': 'YDL_Empty_List',
	1152	},
	1153	'playlist_count': 0,
	1154	}, {
	1155	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1156	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1157	'info_dict': {
	1158	'title': '29C3: Not my department',
	1159	},
	1160	'playlist_count': 95,
	1161	}, {
	1162	'note': 'issue #673',
	1163	'url': 'PLBB231211A4F62143',
	1164	'info_dict': {
	1165	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1166	},
	1167	'playlist_mincount': 26,
	1168	}, {
	1169	'note': 'Large playlist',
	1170	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1171	'info_dict': {
	1172	'title': 'Uploads from Cauchemar',
	1173	},
	1174	'playlist_mincount': 799,
	1175	}, {
	1176	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1177	'info_dict': {
	1178	'title': 'YDL_safe_search',
	1179	},
	1180	'playlist_count': 2,
	1181	}, {
	1182	'note': 'embedded',
	1183	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1184	'playlist_count': 4,
	1185	'info_dict': {
	1186	'title': 'JODA15',
	1187	}
	1188	}, {
	1189	'note': 'Embedded SWF player',
	1190	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1191	'playlist_count': 4,
	1192	'info_dict': {
	1193	'title': 'JODA7',
	1194	}
	1195	}, {
	1196	'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
	1197	'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
	1198	'info_dict': {
	1199	'title': 'Uploads from Interstellar Movie',
	1200	},
	1201	'playlist_mincout': 21,
	1202	}]
	1203
	1204	def _real_initialize(self):
	1205	self._login()
	1206
	1207	def _ids_to_results(self, ids):
	1208	return [
	1209	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1210	for vid_id in ids]
	1211
	1212	def _extract_mix(self, playlist_id):
	1213	# The mixes are generated from a a single video
	1214	# the id of the playlist is just 'RD' + video_id
	1215	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1216	webpage = self._download_webpage(
	1217	url, playlist_id, 'Downloading Youtube mix')
	1218	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1219	title_span = (
	1220	search_title('playlist-title') or
	1221	search_title('title long-title') or
	1222	search_title('title'))
	1223	title = clean_html(title_span)
	1224	ids = orderedSet(re.findall(
	1225	r'''(?xs)data-video-username=".?".?
	1226	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1227	webpage))
	1228	url_results = self._ids_to_results(ids)
	1229
	1230	return self.playlist_result(url_results, playlist_id, title)
	1231
	1232	def _real_extract(self, url):
	1233	# Extract playlist id
	1234	mobj = re.match(self._VALID_URL, url)
	1235	if mobj is None:
	1236	raise ExtractorError('Invalid URL: %s' % url)
	1237	playlist_id = mobj.group(1) or mobj.group(2)
	1238
	1239	# Check if it's a video-specific URL
	1240	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1241	if 'v' in query_dict:
	1242	video_id = query_dict['v'][0]
	1243	if self._downloader.params.get('noplaylist'):
	1244	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1245	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1246	else:
	1247	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1248
	1249	if playlist_id.startswith('RD'):
	1250	# Mixes require a custom extraction process
	1251	return self._extract_mix(playlist_id)
	1252
	1253	url = self._TEMPLATE_URL % playlist_id
	1254	page = self._download_webpage(url, playlist_id)
	1255	more_widget_html = content_html = page
	1256
	1257	# Check if the playlist exists or is private
	1258	if re.search(r'<div class="yt-alert-message">[^<]?(The\|This) playlist (does not exist\|is private)[^<]?</div>', page) is not None:
	1259	raise ExtractorError(
	1260	'The playlist doesn\'t exist or is private, use --username or '
	1261	'--netrc to access it.',
	1262	expected=True)
	1263
	1264	# Extract the video ids from the playlist pages
	1265	ids = []
	1266
	1267	for page_num in itertools.count(1):
	1268	matches = re.finditer(self._VIDEO_RE, content_html)
	1269	# We remove the duplicates and the link with index 0
	1270	# (it's not the first video of the playlist)
	1271	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1272	ids.extend(new_ids)
	1273
	1274	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1275	if not mobj:
	1276	break
	1277
	1278	more = self._download_json(
	1279	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1280	'Downloading page #%s' % page_num,
	1281	transform_source=uppercase_escape)
	1282	content_html = more['content_html']
	1283	if not content_html.strip():
	1284	# Some webpages show a "Load more" button but they don't
	1285	# have more videos
	1286	break
	1287	more_widget_html = more['load_more_widget_html']
	1288
	1289	playlist_title = self._html_search_regex(
	1290	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1291	page, 'title')
	1292
	1293	url_results = self._ids_to_results(ids)
	1294	return self.playlist_result(url_results, playlist_id, playlist_title)
	1295
	1296
	1297	class YoutubeChannelIE(InfoExtractor):
	1298	IE_DESC = 'YouTube.com channels'
	1299	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1300	IE_NAME = 'youtube:channel'
	1301	_TESTS = [{
	1302	'note': 'paginated channel',
	1303	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1304	'playlist_mincount': 91,
	1305	}]
	1306
	1307	def extract_videos_from_page(self, page):
	1308	ids_in_page = []
	1309	for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
	1310	if mobj.group(1) not in ids_in_page:
	1311	ids_in_page.append(mobj.group(1))
	1312	return ids_in_page
	1313
	1314	def _real_extract(self, url):
	1315	channel_id = self._match_id(url)
	1316
	1317	video_ids = []
	1318	url = 'https://www.youtube.com/channel/%s/videos' % channel_id
	1319	channel_page = self._download_webpage(url, channel_id)
	1320	autogenerated = re.search(r'''(?x)
	1321	class="[^"]*?(?:
	1322	channel-header-autogenerated-label\|
	1323	yt-channel-title-autogenerated
	1324	)[^"]*"''', channel_page) is not None
	1325
	1326	if autogenerated:
	1327	# The videos are contained in a single page
	1328	# the ajax pages can't be used, they are empty
	1329	video_ids = self.extract_videos_from_page(channel_page)
	1330	entries = [
	1331	self.url_result(video_id, 'Youtube', video_id=video_id)
	1332	for video_id in video_ids]
	1333	return self.playlist_result(entries, channel_id)
	1334
	1335	def _entries():
	1336	more_widget_html = content_html = channel_page
	1337	for pagenum in itertools.count(1):
	1338
	1339	ids_in_page = self.extract_videos_from_page(content_html)
	1340	for video_id in ids_in_page:
	1341	yield self.url_result(
	1342	video_id, 'Youtube', video_id=video_id)
	1343
	1344	mobj = re.search(
	1345	r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
	1346	more_widget_html)
	1347	if not mobj:
	1348	break
	1349
	1350	more = self._download_json(
	1351	'https://youtube.com/%s' % mobj.group('more'), channel_id,
	1352	'Downloading page #%s' % (pagenum + 1),
	1353	transform_source=uppercase_escape)
	1354	content_html = more['content_html']
	1355	more_widget_html = more['load_more_widget_html']
	1356
	1357	return self.playlist_result(_entries(), channel_id)
	1358
	1359
	1360	class YoutubeUserIE(InfoExtractor):
	1361	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1362	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1363	_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
	1364	_GDATA_PAGE_SIZE = 50
	1365	_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
	1366	IE_NAME = 'youtube:user'
	1367
	1368	_TESTS = [{
	1369	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1370	'playlist_mincount': 320,
	1371	'info_dict': {
	1372	'title': 'TheLinuxFoundation',
	1373	}
	1374	}, {
	1375	'url': 'ytuser:phihag',
	1376	'only_matching': True,
	1377	}]
	1378
	1379	@classmethod
	1380	def suitable(cls, url):
	1381	# Don't return True if the url can be extracted with other youtube
	1382	# extractor, the regex would is too permissive and it would match.
	1383	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1384	if any(ie.suitable(url) for ie in other_ies):
	1385	return False
	1386	else:
	1387	return super(YoutubeUserIE, cls).suitable(url)
	1388
	1389	def _real_extract(self, url):
	1390	username = self._match_id(url)
	1391
	1392	# Download video ids using YouTube Data API. Result size per
	1393	# query is limited (currently to 50 videos) so we need to query
	1394	# page by page until there are no video ids - it means we got
	1395	# all of them.
	1396
	1397	def download_page(pagenum):
	1398	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	1399
	1400	gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
	1401	page = self._download_webpage(
	1402	gdata_url, username,
	1403	'Downloading video ids from %d to %d' % (
	1404	start_index, start_index + self._GDATA_PAGE_SIZE))
	1405
	1406	try:
	1407	response = json.loads(page)
	1408	except ValueError as err:
	1409	raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
	1410	if 'entry' not in response['feed']:
	1411	return
	1412
	1413	# Extract video identifiers
	1414	entries = response['feed']['entry']
	1415	for entry in entries:
	1416	title = entry['title']['$t']
	1417	video_id = entry['id']['$t'].split('/')[-1]
	1418	yield {
	1419	'_type': 'url',
	1420	'url': video_id,
	1421	'ie_key': 'Youtube',
	1422	'id': video_id,
	1423	'title': title,
	1424	}
	1425	url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
	1426
	1427	return self.playlist_result(url_results, playlist_title=username)
	1428
	1429
	1430	class YoutubeSearchIE(SearchInfoExtractor):
	1431	IE_DESC = 'YouTube.com searches'
	1432	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	1433	_MAX_RESULTS = 1000
	1434	IE_NAME = 'youtube:search'
	1435	_SEARCH_KEY = 'ytsearch'
	1436
	1437	def _get_n_results(self, query, n):
	1438	"""Get a specified number of results for a query"""
	1439
	1440	video_ids = []
	1441	pagenum = 0
	1442	limit = n
	1443	PAGE_SIZE = 50
	1444
	1445	while (PAGE_SIZE * pagenum) < limit:
	1446	result_url = self._API_URL % (
	1447	compat_urllib_parse.quote_plus(query.encode('utf-8')),
	1448	(PAGE_SIZE * pagenum) + 1)
	1449	data_json = self._download_webpage(
	1450	result_url, video_id='query "%s"' % query,
	1451	note='Downloading page %s' % (pagenum + 1),
	1452	errnote='Unable to download API page')
	1453	data = json.loads(data_json)
	1454	api_response = data['data']
	1455
	1456	if 'items' not in api_response:
	1457	raise ExtractorError(
	1458	'[youtube] No video results', expected=True)
	1459
	1460	new_ids = list(video['id'] for video in api_response['items'])
	1461	video_ids += new_ids
	1462
	1463	limit = min(n, api_response['totalItems'])
	1464	pagenum += 1
	1465
	1466	if len(video_ids) > n:
	1467	video_ids = video_ids[:n]
	1468	videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
	1469	for video_id in video_ids]
	1470	return self.playlist_result(videos, query)
	1471
	1472
	1473	class YoutubeSearchDateIE(YoutubeSearchIE):
	1474	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1475	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
	1476	_SEARCH_KEY = 'ytsearchdate'
	1477	IE_DESC = 'YouTube.com searches, newest videos first'
	1478
	1479
	1480	class YoutubeSearchURLIE(InfoExtractor):
	1481	IE_DESC = 'YouTube.com search URLs'
	1482	IE_NAME = 'youtube:search_url'
	1483	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1484	_TESTS = [{
	1485	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1486	'playlist_mincount': 5,
	1487	'info_dict': {
	1488	'title': 'youtube-dl test video',
	1489	}
	1490	}]
	1491
	1492	def _real_extract(self, url):
	1493	mobj = re.match(self._VALID_URL, url)
	1494	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1495
	1496	webpage = self._download_webpage(url, query)
	1497	result_code = self._search_regex(
	1498	r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1499
	1500	part_codes = re.findall(
	1501	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1502	entries = []
	1503	for part_code in part_codes:
	1504	part_title = self._html_search_regex(
	1505	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1506	part_url_snippet = self._html_search_regex(
	1507	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1508	part_url = compat_urlparse.urljoin(
	1509	'https://www.youtube.com/', part_url_snippet)
	1510	entries.append({
	1511	'_type': 'url',
	1512	'url': part_url,
	1513	'title': part_title,
	1514	})
	1515
	1516	return {
	1517	'_type': 'playlist',
	1518	'entries': entries,
	1519	'title': query,
	1520	}
	1521
	1522
	1523	class YoutubeShowIE(InfoExtractor):
	1524	IE_DESC = 'YouTube.com (multi-season) shows'
	1525	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1526	IE_NAME = 'youtube:show'
	1527	_TESTS = [{
	1528	'url': 'http://www.youtube.com/show/airdisasters',
	1529	'playlist_mincount': 3,
	1530	'info_dict': {
	1531	'id': 'airdisasters',
	1532	'title': 'Air Disasters',
	1533	}
	1534	}]
	1535
	1536	def _real_extract(self, url):
	1537	mobj = re.match(self._VALID_URL, url)
	1538	playlist_id = mobj.group('id')
	1539	webpage = self._download_webpage(
	1540	url, playlist_id, 'Downloading show webpage')
	1541	# There's one playlist for each season of the show
	1542	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1543	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1544	entries = [
	1545	self.url_result(
	1546	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1547	for season in m_seasons
	1548	]
	1549	title = self._og_search_title(webpage, fatal=False)
	1550
	1551	return {
	1552	'_type': 'playlist',
	1553	'id': playlist_id,
	1554	'title': title,
	1555	'entries': entries,
	1556	}
	1557
	1558
	1559	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1560	"""
	1561	Base class for extractors that fetch info from
	1562	http://www.youtube.com/feed_ajax
	1563	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1564	"""
	1565	_LOGIN_REQUIRED = True
	1566	# use action_load_personal_feed instead of action_load_system_feed
	1567	_PERSONAL_FEED = False
	1568
	1569	@property
	1570	def _FEED_TEMPLATE(self):
	1571	action = 'action_load_system_feed'
	1572	if self._PERSONAL_FEED:
	1573	action = 'action_load_personal_feed'
	1574	return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
	1575
	1576	@property
	1577	def IE_NAME(self):
	1578	return 'youtube:%s' % self._FEED_NAME
	1579
	1580	def _real_initialize(self):
	1581	self._login()
	1582
	1583	def _real_extract(self, url):
	1584	feed_entries = []
	1585	paging = 0
	1586	for i in itertools.count(1):
	1587	info = self._download_json(
	1588	self._FEED_TEMPLATE % paging,
	1589	'%s feed' % self._FEED_NAME,
	1590	'Downloading page %s' % i,
	1591	transform_source=uppercase_escape)
	1592	feed_html = info.get('feed_html') or info.get('content_html')
	1593	load_more_widget_html = info.get('load_more_widget_html') or feed_html
	1594	m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
	1595	ids = orderedSet(m.group(1) for m in m_ids)
	1596	feed_entries.extend(
	1597	self.url_result(video_id, 'Youtube', video_id=video_id)
	1598	for video_id in ids)
	1599	mobj = re.search(
	1600	r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
	1601	load_more_widget_html)
	1602	if mobj is None:
	1603	break
	1604	paging = mobj.group('paging')
	1605	return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
	1606
	1607
	1608	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1609	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1610	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1611	_FEED_NAME = 'recommended'
	1612	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1613
	1614
	1615	class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
	1616	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1617	_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later\|:ytwatchlater'
	1618	_FEED_NAME = 'watch_later'
	1619	_PLAYLIST_TITLE = 'Youtube Watch Later'
	1620	_PERSONAL_FEED = True
	1621
	1622
	1623	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1624	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1625	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1626	_FEED_NAME = 'history'
	1627	_PERSONAL_FEED = True
	1628	_PLAYLIST_TITLE = 'Youtube Watch History'
	1629
	1630
	1631	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1632	IE_NAME = 'youtube:favorites'
	1633	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1634	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1635	_LOGIN_REQUIRED = True
	1636
	1637	def _real_extract(self, url):
	1638	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1639	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1640	return self.url_result(playlist_id, 'YoutubePlaylist')
	1641
	1642
	1643	class YoutubeSubscriptionsIE(YoutubePlaylistIE):
	1644	IE_NAME = 'youtube:subscriptions'
	1645	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1646	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1647	_TESTS = []
	1648
	1649	def _real_extract(self, url):
	1650	title = 'Youtube Subscriptions'
	1651	page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
	1652
	1653	# The extraction process is the same as for playlists, but the regex
	1654	# for the video ids doesn't contain an index
	1655	ids = []
	1656	more_widget_html = content_html = page
	1657
	1658	for page_num in itertools.count(1):
	1659	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1660	new_ids = orderedSet(matches)
	1661	ids.extend(new_ids)
	1662
	1663	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1664	if not mobj:
	1665	break
	1666
	1667	more = self._download_json(
	1668	'https://youtube.com/%s' % mobj.group('more'), title,
	1669	'Downloading page #%s' % page_num,
	1670	transform_source=uppercase_escape)
	1671	content_html = more['content_html']
	1672	more_widget_html = more['load_more_widget_html']
	1673
	1674	return {
	1675	'_type': 'playlist',
	1676	'title': title,
	1677	'entries': self._ids_to_results(ids),
	1678	}
	1679
	1680
	1681	class YoutubeTruncatedURLIE(InfoExtractor):
	1682	IE_NAME = 'youtube:truncated_url'
	1683	IE_DESC = False # Do not list
	1684	_VALID_URL = r'''(?x)
	1685	(?:https?://)?
	1686	(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
	1687	(?:watch\?(?:
	1688	feature=[a-z_]+\|
	1689	annotation_id=annotation_[^&]+\|
	1690	x-yt-cl=[0-9]+\|
	1691	)?
	1692	\|
	1693	attribution_link\?a=[^&]+
	1694	)
	1695	$
	1696	'''
	1697
	1698	_TESTS = [{
	1699	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1700	'only_matching': True,
	1701	}, {
	1702	'url': 'http://www.youtube.com/watch?',
	1703	'only_matching': True,
	1704	}, {
	1705	'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
	1706	'only_matching': True,
	1707	}, {
	1708	'url': 'https://www.youtube.com/watch?feature=foo',
	1709	'only_matching': True,
	1710	}]
	1711
	1712	def _real_extract(self, url):
	1713	raise ExtractorError(
	1714	'Did you forget to quote the URL? Remember that & is a meta '
	1715	'character in most shells, so you want to put the URL in quotes, '
	1716	'like youtube-dl '
	1717	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1718	' or simply youtube-dl BaW_jenozKc .',
	1719	expected=True)
	1720
	1721
	1722	class YoutubeTruncatedIDIE(InfoExtractor):
	1723	IE_NAME = 'youtube:truncated_id'
	1724	IE_DESC = False # Do not list
	1725	_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
	1726
	1727	_TESTS = [{
	1728	'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
	1729	'only_matching': True,
	1730	}]
	1731
	1732	def _real_extract(self, url):
	1733	video_id = self._match_id(url)
	1734	raise ExtractorError(
	1735	'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
	1736	expected=True)