jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from ..jsinterp import JSInterpreter
	15	from ..swfinterp import SWFInterpreter
	16	from ..compat import (
	17	compat_chr,
	18	compat_parse_qs,
	19	compat_urllib_parse,
	20	compat_urllib_parse_unquote,
	21	compat_urllib_parse_unquote_plus,
	22	compat_urllib_parse_urlparse,
	23	compat_urllib_request,
	24	compat_urlparse,
	25	compat_str,
	26	)
	27	from ..utils import (
	28	clean_html,
	29	ExtractorError,
	30	float_or_none,
	31	get_element_by_attribute,
	32	get_element_by_id,
	33	int_or_none,
	34	orderedSet,
	35	parse_duration,
	36	remove_start,
	37	smuggle_url,
	38	str_to_int,
	39	unescapeHTML,
	40	unified_strdate,
	41	unsmuggle_url,
	42	uppercase_escape,
	43	ISO3166Utils,
	44	)
	45
	46
	47	class YoutubeBaseInfoExtractor(InfoExtractor):
	48	"""Provide base functions for Youtube extractors"""
	49	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	50	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	51	_NETRC_MACHINE = 'youtube'
	52	# If True it will raise an error if no login info is provided
	53	_LOGIN_REQUIRED = False
	54
	55	def _set_language(self):
	56	self._set_cookie(
	57	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	58	# YouTube sets the expire time to about two months
	59	expire_time=time.time() + 2 * 30 * 24 * 3600)
	60
	61	def _ids_to_results(self, ids):
	62	return [
	63	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	64	for vid_id in ids]
	65
	66	def _login(self):
	67	"""
	68	Attempt to log in to YouTube.
	69	True is returned if successful or skipped.
	70	False is returned if login failed.
	71
	72	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	73	"""
	74	(username, password) = self._get_login_info()
	75	# No authentication to be performed
	76	if username is None:
	77	if self._LOGIN_REQUIRED:
	78	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	79	return True
	80
	81	login_page = self._download_webpage(
	82	self._LOGIN_URL, None,
	83	note='Downloading login page',
	84	errnote='unable to fetch login page', fatal=False)
	85	if login_page is False:
	86	return
	87
	88	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	89	login_page, 'Login GALX parameter')
	90
	91	# Log in
	92	login_form_strs = {
	93	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	94	'Email': username,
	95	'GALX': galx,
	96	'Passwd': password,
	97
	98	'PersistentCookie': 'yes',
	99	'_utf8': '霱',
	100	'bgresponse': 'js_disabled',
	101	'checkConnection': '',
	102	'checkedDomains': 'youtube',
	103	'dnConn': '',
	104	'pstMsg': '0',
	105	'rmShown': '1',
	106	'secTok': '',
	107	'signIn': 'Sign in',
	108	'timeStmp': '',
	109	'service': 'youtube',
	110	'uilel': '3',
	111	'hl': 'en_US',
	112	}
	113
	114	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	115	# chokes on unicode
	116	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	117	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	118
	119	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	120	login_results = self._download_webpage(
	121	req, None,
	122	note='Logging in', errnote='unable to log in', fatal=False)
	123	if login_results is False:
	124	return False
	125
	126	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	127	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	128
	129	# Two-Factor
	130	# TODO add SMS and phone call support - these require making a request and then prompting the user
	131
	132	if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
	133	tfa_code = self._get_tfa_info('2-step verification code')
	134
	135	if not tfa_code:
	136	self._downloader.report_warning(
	137	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	138	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	139	return False
	140
	141	tfa_code = remove_start(tfa_code, 'G-')
	142
	143	tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
	144
	145	tfa_form_strs.update({
	146	'Pin': tfa_code,
	147	'TrustDevice': 'on',
	148	})
	149
	150	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	151	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	152
	153	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	154	tfa_results = self._download_webpage(
	155	tfa_req, None,
	156	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	157
	158	if tfa_results is False:
	159	return False
	160
	161	if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
	162	self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
	163	return False
	164	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	165	self._downloader.report_warning('unable to log in - did the page structure change?')
	166	return False
	167	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	168	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	169	return False
	170
	171	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	172	self._downloader.report_warning('unable to log in: bad username or password')
	173	return False
	174	return True
	175
	176	def _real_initialize(self):
	177	if self._downloader is None:
	178	return
	179	self._set_language()
	180	if not self._login():
	181	return
	182
	183
	184	class YoutubeIE(YoutubeBaseInfoExtractor):
	185	IE_DESC = 'YouTube.com'
	186	_VALID_URL = r"""(?x)^
	187	(
	188	(?:https?://\|//) # http(s):// or protocol-independent URL
	189	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	190	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	191	(?:www\.)?pwnyoutube\.com/\|
	192	(?:www\.)?yourepeat\.com/\|
	193	tube\.majestyc\.net/\|
	194	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	195	(?:.*?\#/)? # handle anchor (#/) redirect urls
	196	(?: # the various things that can precede the ID:
	197	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	198	\|(?: # or the v= param in all its forms
	199	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	200	(?:\?\|\#!?) # the params delimiter ? or # or #!
	201	(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
	202	v=
	203	)
	204	))
	205	\|youtu\.be/ # just youtu.be/xxxx
	206	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	207	)
	208	)? # all until now is optional -> you can pass the naked ID
	209	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	210	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	211	(?(1).+)? # if we found the ID, everything can follow
	212	$"""
	213	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	214	_formats = {
	215	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	216	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	217	'13': {'ext': '3gp'},
	218	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	219	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	220	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	221	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	222	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	223	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	224	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	225	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	226	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	227	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	228	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	229	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	230	'59': {'ext': 'mp4', 'width': 854, 'height': 480},
	231	'78': {'ext': 'mp4', 'width': 854, 'height': 480},
	232
	233
	234	# 3d videos
	235	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	236	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	237	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	238	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	239	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	240	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	241	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	242
	243	# Apple HTTP Live Streaming
	244	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	245	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	246	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	247	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	248	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	249	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	250	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	251
	252	# DASH mp4 video
	253	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	254	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	255	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	256	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	257	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	258	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	259	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	260	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	261	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	262	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	263	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	264
	265	# Dash mp4 audio
	266	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	267	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	268	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	269
	270	# Dash webm
	271	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	272	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	273	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	274	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	275	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	276	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	277	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
	278	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	279	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	280	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	281	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	282	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	283	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	284	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	285	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	286	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	287	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	288	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	289	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	290	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
	291	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	292
	293	# Dash webm audio
	294	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	295	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	296
	297	# Dash webm audio with opus inside
	298	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	299	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	300	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	301
	302	# RTMP (unnamed)
	303	'_rtmp': {'protocol': 'rtmp'},
	304	}
	305
	306	IE_NAME = 'youtube'
	307	_TESTS = [
	308	{
	309	'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
	310	'info_dict': {
	311	'id': 'BaW_jenozKc',
	312	'ext': 'mp4',
	313	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	314	'uploader': 'Philipp Hagemeister',
	315	'uploader_id': 'phihag',
	316	'upload_date': '20121002',
	317	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	318	'categories': ['Science & Technology'],
	319	'tags': ['youtube-dl'],
	320	'like_count': int,
	321	'dislike_count': int,
	322	'start_time': 1,
	323	'end_time': 9,
	324	}
	325	},
	326	{
	327	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	328	'note': 'Test generic use_cipher_signature video (#897)',
	329	'info_dict': {
	330	'id': 'UxxajLWwzqY',
	331	'ext': 'mp4',
	332	'upload_date': '20120506',
	333	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	334	'description': 'md5:782e8651347686cba06e58f71ab51773',
	335	'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
	336	'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
	337	'iconic ep', 'iconic', 'love', 'it'],
	338	'uploader': 'Icona Pop',
	339	'uploader_id': 'IconaPop',
	340	}
	341	},
	342	{
	343	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	344	'note': 'Test VEVO video with age protection (#956)',
	345	'info_dict': {
	346	'id': '07FYdnEawAQ',
	347	'ext': 'mp4',
	348	'upload_date': '20130703',
	349	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	350	'description': 'md5:64249768eec3bc4276236606ea996373',
	351	'uploader': 'justintimberlakeVEVO',
	352	'uploader_id': 'justintimberlakeVEVO',
	353	'age_limit': 18,
	354	}
	355	},
	356	{
	357	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	358	'note': 'Embed-only video (#1746)',
	359	'info_dict': {
	360	'id': 'yZIXLfi8CZQ',
	361	'ext': 'mp4',
	362	'upload_date': '20120608',
	363	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	364	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	365	'uploader': 'SET India',
	366	'uploader_id': 'setindia'
	367	}
	368	},
	369	{
	370	'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
	371	'note': 'Use the first video ID in the URL',
	372	'info_dict': {
	373	'id': 'BaW_jenozKc',
	374	'ext': 'mp4',
	375	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	376	'uploader': 'Philipp Hagemeister',
	377	'uploader_id': 'phihag',
	378	'upload_date': '20121002',
	379	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	380	'categories': ['Science & Technology'],
	381	'tags': ['youtube-dl'],
	382	'like_count': int,
	383	'dislike_count': int,
	384	},
	385	'params': {
	386	'skip_download': True,
	387	},
	388	},
	389	{
	390	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	391	'note': '256k DASH audio (format 141) via DASH manifest',
	392	'info_dict': {
	393	'id': 'a9LDPn-MO4I',
	394	'ext': 'm4a',
	395	'upload_date': '20121002',
	396	'uploader_id': '8KVIDEO',
	397	'description': '',
	398	'uploader': '8KVIDEO',
	399	'title': 'UHDTV TEST 8K VIDEO.mp4'
	400	},
	401	'params': {
	402	'youtube_include_dash_manifest': True,
	403	'format': '141',
	404	},
	405	},
	406	# DASH manifest with encrypted signature
	407	{
	408	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	409	'info_dict': {
	410	'id': 'IB3lcPjvWLA',
	411	'ext': 'm4a',
	412	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	413	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	414	'uploader': 'AfrojackVEVO',
	415	'uploader_id': 'AfrojackVEVO',
	416	'upload_date': '20131011',
	417	},
	418	'params': {
	419	'youtube_include_dash_manifest': True,
	420	'format': '141',
	421	},
	422	},
	423	# JS player signature function name containing $
	424	{
	425	'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
	426	'info_dict': {
	427	'id': 'nfWlot6h_JM',
	428	'ext': 'm4a',
	429	'title': 'Taylor Swift - Shake It Off',
	430	'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
	431	'uploader': 'TaylorSwiftVEVO',
	432	'uploader_id': 'TaylorSwiftVEVO',
	433	'upload_date': '20140818',
	434	},
	435	'params': {
	436	'youtube_include_dash_manifest': True,
	437	'format': '141',
	438	},
	439	},
	440	# Controversy video
	441	{
	442	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	443	'info_dict': {
	444	'id': 'T4XJQO3qol8',
	445	'ext': 'mp4',
	446	'upload_date': '20100909',
	447	'uploader': 'The Amazing Atheist',
	448	'uploader_id': 'TheAmazingAtheist',
	449	'title': 'Burning Everyone\'s Koran',
	450	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	451	}
	452	},
	453	# Normal age-gate video (No vevo, embed allowed)
	454	{
	455	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	456	'info_dict': {
	457	'id': 'HtVdAasjOgU',
	458	'ext': 'mp4',
	459	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	460	'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
	461	'uploader': 'The Witcher',
	462	'uploader_id': 'WitcherGame',
	463	'upload_date': '20140605',
	464	'age_limit': 18,
	465	},
	466	},
	467	# Age-gate video with encrypted signature
	468	{
	469	'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
	470	'info_dict': {
	471	'id': '6kLq3WMV1nU',
	472	'ext': 'mp4',
	473	'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
	474	'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
	475	'uploader': 'LloydVEVO',
	476	'uploader_id': 'LloydVEVO',
	477	'upload_date': '20110629',
	478	'age_limit': 18,
	479	},
	480	},
	481	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	482	{
	483	'url': '__2ABJjxzNo',
	484	'info_dict': {
	485	'id': '__2ABJjxzNo',
	486	'ext': 'mp4',
	487	'upload_date': '20100430',
	488	'uploader_id': 'deadmau5',
	489	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	490	'uploader': 'deadmau5',
	491	'title': 'Deadmau5 - Some Chords (HD)',
	492	},
	493	'expected_warnings': [
	494	'DASH manifest missing',
	495	]
	496	},
	497	# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
	498	{
	499	'url': 'lqQg6PlCWgI',
	500	'info_dict': {
	501	'id': 'lqQg6PlCWgI',
	502	'ext': 'mp4',
	503	'upload_date': '20120724',
	504	'uploader_id': 'olympic',
	505	'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
	506	'uploader': 'Olympics',
	507	'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
	508	},
	509	'params': {
	510	'skip_download': 'requires avconv',
	511	}
	512	},
	513	# Non-square pixels
	514	{
	515	'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
	516	'info_dict': {
	517	'id': '_b-2C3KPAM0',
	518	'ext': 'mp4',
	519	'stretched_ratio': 16 / 9.,
	520	'upload_date': '20110310',
	521	'uploader_id': 'AllenMeow',
	522	'description': 'made by Wacom from Korea \| 字幕&加油添醋 by TY\'s Allen \| 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
	523	'uploader': '孫艾倫',
	524	'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',
	525	},
	526	},
	527	# url_encoded_fmt_stream_map is empty string
	528	{
	529	'url': 'qEJwOuvDf7I',
	530	'info_dict': {
	531	'id': 'qEJwOuvDf7I',
	532	'ext': 'webm',
	533	'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
	534	'description': '',
	535	'upload_date': '20150404',
	536	'uploader_id': 'spbelect',
	537	'uploader': 'Наблюдатели Петербурга',
	538	},
	539	'params': {
	540	'skip_download': 'requires avconv',
	541	}
	542	},
	543	# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
	544	{
	545	'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
	546	'info_dict': {
	547	'id': 'FIl7x6_3R5Y',
	548	'ext': 'mp4',
	549	'title': 'md5:7b81415841e02ecd4313668cde88737a',
	550	'description': 'md5:116377fd2963b81ec4ce64b542173306',
	551	'upload_date': '20150625',
	552	'uploader_id': 'dorappi2000',
	553	'uploader': 'dorappi2000',
	554	'formats': 'mincount:33',
	555	},
	556	},
	557	# DASH manifest with segment_list
	558	{
	559	'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
	560	'md5': '8ce563a1d667b599d21064e982ab9e31',
	561	'info_dict': {
	562	'id': 'CsmdDsKjzN8',
	563	'ext': 'mp4',
	564	'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
	565	'uploader': 'Airtek',
	566	'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
	567	'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
	568	'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
	569	},
	570	'params': {
	571	'youtube_include_dash_manifest': True,
	572	'format': '135', # bestvideo
	573	}
	574	},
	575	{
	576	# Multifeed videos (multiple cameras), URL is for Main Camera
	577	'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
	578	'info_dict': {
	579	'id': 'jqWvoWXjCVs',
	580	'title': 'teamPGP: Rocket League Noob Stream',
	581	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	582	},
	583	'playlist': [{
	584	'info_dict': {
	585	'id': 'jqWvoWXjCVs',
	586	'ext': 'mp4',
	587	'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
	588	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	589	'upload_date': '20150721',
	590	'uploader': 'Beer Games Beer',
	591	'uploader_id': 'beergamesbeer',
	592	},
	593	}, {
	594	'info_dict': {
	595	'id': '6h8e8xoXJzg',
	596	'ext': 'mp4',
	597	'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
	598	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	599	'upload_date': '20150721',
	600	'uploader': 'Beer Games Beer',
	601	'uploader_id': 'beergamesbeer',
	602	},
	603	}, {
	604	'info_dict': {
	605	'id': 'PUOgX5z9xZw',
	606	'ext': 'mp4',
	607	'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
	608	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	609	'upload_date': '20150721',
	610	'uploader': 'Beer Games Beer',
	611	'uploader_id': 'beergamesbeer',
	612	},
	613	}, {
	614	'info_dict': {
	615	'id': 'teuwxikvS5k',
	616	'ext': 'mp4',
	617	'title': 'teamPGP: Rocket League Noob Stream (zim)',
	618	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	619	'upload_date': '20150721',
	620	'uploader': 'Beer Games Beer',
	621	'uploader_id': 'beergamesbeer',
	622	},
	623	}],
	624	'params': {
	625	'skip_download': True,
	626	},
	627	}
	628	]
	629
	630	def __init__(self, args, *kwargs):
	631	super(YoutubeIE, self).__init__(args, *kwargs)
	632	self._player_cache = {}
	633
	634	def report_video_info_webpage_download(self, video_id):
	635	"""Report attempt to download video info webpage."""
	636	self.to_screen('%s: Downloading video info webpage' % video_id)
	637
	638	def report_information_extraction(self, video_id):
	639	"""Report attempt to extract video information."""
	640	self.to_screen('%s: Extracting video information' % video_id)
	641
	642	def report_unavailable_format(self, video_id, format):
	643	"""Report extracted video URL."""
	644	self.to_screen('%s: Format %s not available' % (video_id, format))
	645
	646	def report_rtmp_download(self):
	647	"""Indicate the download will use the RTMP protocol."""
	648	self.to_screen('RTMP download detected')
	649
	650	def _signature_cache_id(self, example_sig):
	651	""" Return a string representation of a signature """
	652	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	653
	654	def _extract_signature_function(self, video_id, player_url, example_sig):
	655	id_m = re.match(
	656	r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	657	player_url)
	658	if not id_m:
	659	raise ExtractorError('Cannot identify player %r' % player_url)
	660	player_type = id_m.group('ext')
	661	player_id = id_m.group('id')
	662
	663	# Read from filesystem cache
	664	func_id = '%s_%s_%s' % (
	665	player_type, player_id, self._signature_cache_id(example_sig))
	666	assert os.path.basename(func_id) == func_id
	667
	668	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	669	if cache_spec is not None:
	670	return lambda s: ''.join(s[i] for i in cache_spec)
	671
	672	download_note = (
	673	'Downloading player %s' % player_url
	674	if self._downloader.params.get('verbose') else
	675	'Downloading %s player %s' % (player_type, player_id)
	676	)
	677	if player_type == 'js':
	678	code = self._download_webpage(
	679	player_url, video_id,
	680	note=download_note,
	681	errnote='Download of %s failed' % player_url)
	682	res = self._parse_sig_js(code)
	683	elif player_type == 'swf':
	684	urlh = self._request_webpage(
	685	player_url, video_id,
	686	note=download_note,
	687	errnote='Download of %s failed' % player_url)
	688	code = urlh.read()
	689	res = self._parse_sig_swf(code)
	690	else:
	691	assert False, 'Invalid player type %r' % player_type
	692
	693	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	694	cache_res = res(test_string)
	695	cache_spec = [ord(c) for c in cache_res]
	696
	697	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	698	return res
	699
	700	def _print_sig_code(self, func, example_sig):
	701	def gen_sig_code(idxs):
	702	def _genslice(start, end, step):
	703	starts = '' if start == 0 else str(start)
	704	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	705	steps = '' if step == 1 else (':%d' % step)
	706	return 's[%s%s%s]' % (starts, ends, steps)
	707
	708	step = None
	709	# Quelch pyflakes warnings - start will be set when step is set
	710	start = '(Never used)'
	711	for i, prev in zip(idxs[1:], idxs[:-1]):
	712	if step is not None:
	713	if i - prev == step:
	714	continue
	715	yield _genslice(start, prev, step)
	716	step = None
	717	continue
	718	if i - prev in [-1, 1]:
	719	step = i - prev
	720	start = prev
	721	continue
	722	else:
	723	yield 's[%d]' % prev
	724	if step is None:
	725	yield 's[%d]' % i
	726	else:
	727	yield _genslice(start, i, step)
	728
	729	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	730	cache_res = func(test_string)
	731	cache_spec = [ord(c) for c in cache_res]
	732	expr_code = ' + '.join(gen_sig_code(cache_spec))
	733	signature_id_tuple = '(%s)' % (
	734	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	735	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	736	' return %s\n') % (signature_id_tuple, expr_code)
	737	self.to_screen('Extracted signature function:\n' + code)
	738
	739	def _parse_sig_js(self, jscode):
	740	funcname = self._search_regex(
	741	r'\.sig\\|\\|([a-zA-Z0-9$]+)\(', jscode,
	742	'Initial JS player signature function name')
	743
	744	jsi = JSInterpreter(jscode)
	745	initial_function = jsi.extract_function(funcname)
	746	return lambda s: initial_function([s])
	747
	748	def _parse_sig_swf(self, file_contents):
	749	swfi = SWFInterpreter(file_contents)
	750	TARGET_CLASSNAME = 'SignatureDecipher'
	751	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	752	initial_function = swfi.extract_function(searched_class, 'decipher')
	753	return lambda s: initial_function([s])
	754
	755	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	756	"""Turn the encrypted s field into a working signature"""
	757
	758	if player_url is None:
	759	raise ExtractorError('Cannot decrypt signature without player_url')
	760
	761	if player_url.startswith('//'):
	762	player_url = 'https:' + player_url
	763	try:
	764	player_id = (player_url, self._signature_cache_id(s))
	765	if player_id not in self._player_cache:
	766	func = self._extract_signature_function(
	767	video_id, player_url, s
	768	)
	769	self._player_cache[player_id] = func
	770	func = self._player_cache[player_id]
	771	if self._downloader.params.get('youtube_print_sig_code'):
	772	self._print_sig_code(func, s)
	773	return func(s)
	774	except Exception as e:
	775	tb = traceback.format_exc()
	776	raise ExtractorError(
	777	'Signature extraction failed: ' + tb, cause=e)
	778
	779	def _get_subtitles(self, video_id, webpage):
	780	try:
	781	subs_doc = self._download_xml(
	782	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	783	video_id, note=False)
	784	except ExtractorError as err:
	785	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	786	return {}
	787
	788	sub_lang_list = {}
	789	for track in subs_doc.findall('track'):
	790	lang = track.attrib['lang_code']
	791	if lang in sub_lang_list:
	792	continue
	793	sub_formats = []
	794	for ext in ['sbv', 'vtt', 'srt']:
	795	params = compat_urllib_parse.urlencode({
	796	'lang': lang,
	797	'v': video_id,
	798	'fmt': ext,
	799	'name': track.attrib['name'].encode('utf-8'),
	800	})
	801	sub_formats.append({
	802	'url': 'https://www.youtube.com/api/timedtext?' + params,
	803	'ext': ext,
	804	})
	805	sub_lang_list[lang] = sub_formats
	806	if not sub_lang_list:
	807	self._downloader.report_warning('video doesn\'t have subtitles')
	808	return {}
	809	return sub_lang_list
	810
	811	def _get_automatic_captions(self, video_id, webpage):
	812	"""We need the webpage for getting the captions url, pass it as an
	813	argument to speed up the process."""
	814	self.to_screen('%s: Looking for automatic captions' % video_id)
	815	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	816	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	817	if mobj is None:
	818	self._downloader.report_warning(err_msg)
	819	return {}
	820	player_config = json.loads(mobj.group(1))
	821	try:
	822	args = player_config['args']
	823	caption_url = args['ttsurl']
	824	timestamp = args['timestamp']
	825	# We get the available subtitles
	826	list_params = compat_urllib_parse.urlencode({
	827	'type': 'list',
	828	'tlangs': 1,
	829	'asrs': 1,
	830	})
	831	list_url = caption_url + '&' + list_params
	832	caption_list = self._download_xml(list_url, video_id)
	833	original_lang_node = caption_list.find('track')
	834	if original_lang_node is None:
	835	self._downloader.report_warning('Video doesn\'t have automatic captions')
	836	return {}
	837	original_lang = original_lang_node.attrib['lang_code']
	838	caption_kind = original_lang_node.attrib.get('kind', '')
	839
	840	sub_lang_list = {}
	841	for lang_node in caption_list.findall('target'):
	842	sub_lang = lang_node.attrib['lang_code']
	843	sub_formats = []
	844	for ext in ['sbv', 'vtt', 'srt']:
	845	params = compat_urllib_parse.urlencode({
	846	'lang': original_lang,
	847	'tlang': sub_lang,
	848	'fmt': ext,
	849	'ts': timestamp,
	850	'kind': caption_kind,
	851	})
	852	sub_formats.append({
	853	'url': caption_url + '&' + params,
	854	'ext': ext,
	855	})
	856	sub_lang_list[sub_lang] = sub_formats
	857	return sub_lang_list
	858	# An extractor error can be raise by the download process if there are
	859	# no automatic captions but there are subtitles
	860	except (KeyError, ExtractorError):
	861	self._downloader.report_warning(err_msg)
	862	return {}
	863
	864	@classmethod
	865	def extract_id(cls, url):
	866	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	867	if mobj is None:
	868	raise ExtractorError('Invalid URL: %s' % url)
	869	video_id = mobj.group(2)
	870	return video_id
	871
	872	def _extract_from_m3u8(self, manifest_url, video_id):
	873	url_map = {}
	874
	875	def _get_urls(_manifest):
	876	lines = _manifest.split('\n')
	877	urls = filter(lambda l: l and not l.startswith('#'),
	878	lines)
	879	return urls
	880	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	881	formats_urls = _get_urls(manifest)
	882	for format_url in formats_urls:
	883	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	884	url_map[itag] = format_url
	885	return url_map
	886
	887	def _extract_annotations(self, video_id):
	888	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	889	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	890
	891	def _parse_dash_manifest(
	892	self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
	893	def decrypt_sig(mobj):
	894	s = mobj.group(1)
	895	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	896	return '/signature/%s' % dec_s
	897	dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
	898	dash_doc = self._download_xml(
	899	dash_manifest_url, video_id,
	900	note='Downloading DASH manifest',
	901	errnote='Could not download DASH manifest',
	902	fatal=fatal)
	903
	904	if dash_doc is False:
	905	return []
	906
	907	formats = []
	908	for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
	909	mime_type = a.attrib.get('mimeType')
	910	for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	911	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	912	if url_el is None:
	913	continue
	914	if mime_type == 'text/vtt':
	915	# TODO implement WebVTT downloading
	916	pass
	917	elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
	918	segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
	919	format_id = r.attrib['id']
	920	video_url = url_el.text
	921	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	922	f = {
	923	'format_id': format_id,
	924	'url': video_url,
	925	'width': int_or_none(r.attrib.get('width')),
	926	'height': int_or_none(r.attrib.get('height')),
	927	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	928	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	929	'filesize': filesize,
	930	'fps': int_or_none(r.attrib.get('frameRate')),
	931	}
	932	if segment_list is not None:
	933	f.update({
	934	'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
	935	'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
	936	'protocol': 'http_dash_segments',
	937	})
	938	try:
	939	existing_format = next(
	940	fo for fo in formats
	941	if fo['format_id'] == format_id)
	942	except StopIteration:
	943	full_info = self._formats.get(format_id, {}).copy()
	944	full_info.update(f)
	945	codecs = r.attrib.get('codecs')
	946	if codecs:
	947	if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
	948	full_info['vcodec'] = codecs
	949	elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
	950	full_info['acodec'] = codecs
	951	formats.append(full_info)
	952	else:
	953	existing_format.update(f)
	954	else:
	955	self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
	956	return formats
	957
	958	def _real_extract(self, url):
	959	url, smuggled_data = unsmuggle_url(url, {})
	960
	961	proto = (
	962	'http' if self._downloader.params.get('prefer_insecure', False)
	963	else 'https')
	964
	965	start_time = None
	966	end_time = None
	967	parsed_url = compat_urllib_parse_urlparse(url)
	968	for component in [parsed_url.fragment, parsed_url.query]:
	969	query = compat_parse_qs(component)
	970	if start_time is None and 't' in query:
	971	start_time = parse_duration(query['t'][0])
	972	if start_time is None and 'start' in query:
	973	start_time = parse_duration(query['start'][0])
	974	if end_time is None and 'end' in query:
	975	end_time = parse_duration(query['end'][0])
	976
	977	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	978	mobj = re.search(self._NEXT_URL_RE, url)
	979	if mobj:
	980	url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
	981	video_id = self.extract_id(url)
	982
	983	# Get video webpage
	984	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	985	video_webpage = self._download_webpage(url, video_id)
	986
	987	# Attempt to extract SWF player URL
	988	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	989	if mobj is not None:
	990	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	991	else:
	992	player_url = None
	993
	994	dash_mpds = []
	995
	996	def add_dash_mpd(video_info):
	997	dash_mpd = video_info.get('dashmpd')
	998	if dash_mpd and dash_mpd[0] not in dash_mpds:
	999	dash_mpds.append(dash_mpd[0])
	1000
	1001	# Get video info
	1002	embed_webpage = None
	1003	is_live = None
	1004	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	1005	age_gate = True
	1006	# We simulate the access to the video from www.youtube.com/v/{video_id}
	1007	# this can be viewed without login into Youtube
	1008	url = proto + '://www.youtube.com/embed/%s' % video_id
	1009	embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
	1010	data = compat_urllib_parse.urlencode({
	1011	'video_id': video_id,
	1012	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	1013	'sts': self._search_regex(
	1014	r'"sts"\s:\s(\d+)', embed_webpage, 'sts', default=''),
	1015	})
	1016	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	1017	video_info_webpage = self._download_webpage(
	1018	video_info_url, video_id,
	1019	note='Refetching age-gated info webpage',
	1020	errnote='unable to download video info webpage')
	1021	video_info = compat_parse_qs(video_info_webpage)
	1022	add_dash_mpd(video_info)
	1023	else:
	1024	age_gate = False
	1025	video_info = None
	1026	# Try looking directly into the video webpage
	1027	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	1028	if mobj:
	1029	json_code = uppercase_escape(mobj.group(1))
	1030	ytplayer_config = json.loads(json_code)
	1031	args = ytplayer_config['args']
	1032	if args.get('url_encoded_fmt_stream_map'):
	1033	# Convert to the same format returned by compat_parse_qs
	1034	video_info = dict((k, [v]) for k, v in args.items())
	1035	add_dash_mpd(video_info)
	1036	if args.get('livestream') == '1' or args.get('live_playback') == 1:
	1037	is_live = True
	1038	if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
	1039	# We also try looking in get_video_info since it may contain different dashmpd
	1040	# URL that points to a DASH manifest with possibly different itag set (some itags
	1041	# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
	1042	# manifest pointed by get_video_info's dashmpd).
	1043	# The general idea is to take a union of itags of both DASH manifests (for example
	1044	# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
	1045	self.report_video_info_webpage_download(video_id)
	1046	for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1047	video_info_url = (
	1048	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1049	% (proto, video_id, el_type))
	1050	video_info_webpage = self._download_webpage(
	1051	video_info_url,
	1052	video_id, note=False,
	1053	errnote='unable to download video info webpage')
	1054	get_video_info = compat_parse_qs(video_info_webpage)
	1055	if get_video_info.get('use_cipher_signature') != ['True']:
	1056	add_dash_mpd(get_video_info)
	1057	if not video_info:
	1058	video_info = get_video_info
	1059	if 'token' in get_video_info:
	1060	break
	1061	if 'token' not in video_info:
	1062	if 'reason' in video_info:
	1063	if 'The uploader has not made this video available in your country.' in video_info['reason']:
	1064	regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
	1065	if regions_allowed:
	1066	raise ExtractorError('YouTube said: This video is available in %s only' % (
	1067	', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
	1068	expected=True)
	1069	raise ExtractorError(
	1070	'YouTube said: %s' % video_info['reason'][0],
	1071	expected=True, video_id=video_id)
	1072	else:
	1073	raise ExtractorError(
	1074	'"token" parameter not in video info for unknown reason',
	1075	video_id=video_id)
	1076
	1077	# title
	1078	if 'title' in video_info:
	1079	video_title = video_info['title'][0]
	1080	else:
	1081	self._downloader.report_warning('Unable to extract video title')
	1082	video_title = '_'
	1083
	1084	# description
	1085	video_description = get_element_by_id("eow-description", video_webpage)
	1086	if video_description:
	1087	video_description = re.sub(r'''(?x)
	1088	<a\s+
	1089	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1090	title="([^"]+)"\s+
	1091	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1092	class="yt-uix-redirect-link"\s*>
	1093	[^<]+
	1094	</a>
	1095	''', r'\1', video_description)
	1096	video_description = clean_html(video_description)
	1097	else:
	1098	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	1099	if fd_mobj:
	1100	video_description = unescapeHTML(fd_mobj.group(1))
	1101	else:
	1102	video_description = ''
	1103
	1104	if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
	1105	if not self._downloader.params.get('noplaylist'):
	1106	entries = []
	1107	feed_ids = []
	1108	multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
	1109	for feed in multifeed_metadata_list.split(','):
	1110	feed_data = compat_parse_qs(feed)
	1111	entries.append({
	1112	'_type': 'url_transparent',
	1113	'ie_key': 'Youtube',
	1114	'url': smuggle_url(
	1115	'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
	1116	{'force_singlefeed': True}),
	1117	'title': '%s (%s)' % (video_title, feed_data['title'][0]),
	1118	})
	1119	feed_ids.append(feed_data['id'][0])
	1120	self.to_screen(
	1121	'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
	1122	% (', '.join(feed_ids), video_id))
	1123	return self.playlist_result(entries, video_id, video_title, video_description)
	1124	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1125
	1126	if 'view_count' in video_info:
	1127	view_count = int(video_info['view_count'][0])
	1128	else:
	1129	view_count = None
	1130
	1131	# Check for "rental" videos
	1132	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	1133	raise ExtractorError('"rental" videos not supported')
	1134
	1135	# Start extracting information
	1136	self.report_information_extraction(video_id)
	1137
	1138	# uploader
	1139	if 'author' not in video_info:
	1140	raise ExtractorError('Unable to extract uploader name')
	1141	video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
	1142
	1143	# uploader_id
	1144	video_uploader_id = None
	1145	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	1146	if mobj is not None:
	1147	video_uploader_id = mobj.group(1)
	1148	else:
	1149	self._downloader.report_warning('unable to extract uploader nickname')
	1150
	1151	# thumbnail image
	1152	# We try first to get a high quality image:
	1153	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	1154	video_webpage, re.DOTALL)
	1155	if m_thumb is not None:
	1156	video_thumbnail = m_thumb.group(1)
	1157	elif 'thumbnail_url' not in video_info:
	1158	self._downloader.report_warning('unable to extract video thumbnail')
	1159	video_thumbnail = None
	1160	else: # don't panic if we can't find it
	1161	video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
	1162
	1163	# upload date
	1164	upload_date = self._html_search_meta(
	1165	'datePublished', video_webpage, 'upload date', default=None)
	1166	if not upload_date:
	1167	upload_date = self._search_regex(
	1168	[r'(?s)id="eow-date.?>(.?)</span>',
	1169	r'id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live\|Started) on (.+?)</strong>'],
	1170	video_webpage, 'upload date', default=None)
	1171	if upload_date:
	1172	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1173	upload_date = unified_strdate(upload_date)
	1174
	1175	m_cat_container = self._search_regex(
	1176	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	1177	video_webpage, 'categories', default=None)
	1178	if m_cat_container:
	1179	category = self._html_search_regex(
	1180	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	1181	default=None)
	1182	video_categories = None if category is None else [category]
	1183	else:
	1184	video_categories = None
	1185
	1186	video_tags = [
	1187	unescapeHTML(m.group('content'))
	1188	for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
	1189
	1190	def _extract_count(count_name):
	1191	return str_to_int(self._search_regex(
	1192	r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
	1193	% re.escape(count_name),
	1194	video_webpage, count_name, default=None))
	1195
	1196	like_count = _extract_count('like')
	1197	dislike_count = _extract_count('dislike')
	1198
	1199	# subtitles
	1200	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	1201	automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
	1202
	1203	if 'length_seconds' not in video_info:
	1204	self._downloader.report_warning('unable to extract video duration')
	1205	video_duration = None
	1206	else:
	1207	video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
	1208
	1209	# annotations
	1210	video_annotations = None
	1211	if self._downloader.params.get('writeannotations', False):
	1212	video_annotations = self._extract_annotations(video_id)
	1213
	1214	def _map_to_format_list(urlmap):
	1215	formats = []
	1216	for itag, video_real_url in urlmap.items():
	1217	dct = {
	1218	'format_id': itag,
	1219	'url': video_real_url,
	1220	'player_url': player_url,
	1221	}
	1222	if itag in self._formats:
	1223	dct.update(self._formats[itag])
	1224	formats.append(dct)
	1225	return formats
	1226
	1227	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1228	self.report_rtmp_download()
	1229	formats = [{
	1230	'format_id': '_rtmp',
	1231	'protocol': 'rtmp',
	1232	'url': video_info['conn'][0],
	1233	'player_url': player_url,
	1234	}]
	1235	elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
	1236	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	1237	if 'rtmpe%3Dyes' in encoded_url_map:
	1238	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1239	url_map = {}
	1240	for url_data_str in encoded_url_map.split(','):
	1241	url_data = compat_parse_qs(url_data_str)
	1242	if 'itag' not in url_data or 'url' not in url_data:
	1243	continue
	1244	format_id = url_data['itag'][0]
	1245	url = url_data['url'][0]
	1246
	1247	if 'sig' in url_data:
	1248	url += '&signature=' + url_data['sig'][0]
	1249	elif 's' in url_data:
	1250	encrypted_sig = url_data['s'][0]
	1251	ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
	1252
	1253	jsplayer_url_json = self._search_regex(
	1254	ASSETS_RE,
	1255	embed_webpage if age_gate else video_webpage,
	1256	'JS player URL (1)', default=None)
	1257	if not jsplayer_url_json and not age_gate:
	1258	# We need the embed website after all
	1259	if embed_webpage is None:
	1260	embed_url = proto + '://www.youtube.com/embed/%s' % video_id
	1261	embed_webpage = self._download_webpage(
	1262	embed_url, video_id, 'Downloading embed webpage')
	1263	jsplayer_url_json = self._search_regex(
	1264	ASSETS_RE, embed_webpage, 'JS player URL')
	1265
	1266	player_url = json.loads(jsplayer_url_json)
	1267	if player_url is None:
	1268	player_url_json = self._search_regex(
	1269	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	1270	video_webpage, 'age gate player URL')
	1271	player_url = json.loads(player_url_json)
	1272
	1273	if self._downloader.params.get('verbose'):
	1274	if player_url is None:
	1275	player_version = 'unknown'
	1276	player_desc = 'unknown'
	1277	else:
	1278	if player_url.endswith('swf'):
	1279	player_version = self._search_regex(
	1280	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	1281	'flash player', fatal=False)
	1282	player_desc = 'flash player %s' % player_version
	1283	else:
	1284	player_version = self._search_regex(
	1285	r'html5player-([^/]+?)(?:/html5player)?\.js',
	1286	player_url,
	1287	'html5 player', fatal=False)
	1288	player_desc = 'html5 player %s' % player_version
	1289
	1290	parts_sizes = self._signature_cache_id(encrypted_sig)
	1291	self.to_screen('{%s} signature length %s, %s' %
	1292	(format_id, parts_sizes, player_desc))
	1293
	1294	signature = self._decrypt_signature(
	1295	encrypted_sig, video_id, player_url, age_gate)
	1296	url += '&signature=' + signature
	1297	if 'ratebypass' not in url:
	1298	url += '&ratebypass=yes'
	1299	url_map[format_id] = url
	1300	formats = _map_to_format_list(url_map)
	1301	elif video_info.get('hlsvp'):
	1302	manifest_url = video_info['hlsvp'][0]
	1303	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1304	formats = _map_to_format_list(url_map)
	1305	else:
	1306	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1307
	1308	# Look for the DASH manifest
	1309	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1310	dash_mpd_fatal = True
	1311	for dash_manifest_url in dash_mpds:
	1312	dash_formats = {}
	1313	try:
	1314	for df in self._parse_dash_manifest(
	1315	video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
	1316	# Do not overwrite DASH format found in some previous DASH manifest
	1317	if df['format_id'] not in dash_formats:
	1318	dash_formats[df['format_id']] = df
	1319	# Additional DASH manifests may end up in HTTP Error 403 therefore
	1320	# allow them to fail without bug report message if we already have
	1321	# some DASH manifest succeeded. This is temporary workaround to reduce
	1322	# burst of bug reports until we figure out the reason and whether it
	1323	# can be fixed at all.
	1324	dash_mpd_fatal = False
	1325	except (ExtractorError, KeyError) as e:
	1326	self.report_warning(
	1327	'Skipping DASH manifest: %r' % e, video_id)
	1328	if dash_formats:
	1329	# Remove the formats we found through non-DASH, they
	1330	# contain less info and it can be wrong, because we use
	1331	# fixed values (for example the resolution). See
	1332	# https://github.com/rg3/youtube-dl/issues/5774 for an
	1333	# example.
	1334	formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
	1335	formats.extend(dash_formats.values())
	1336
	1337	# Check for malformed aspect ratio
	1338	stretched_m = re.search(
	1339	r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
	1340	video_webpage)
	1341	if stretched_m:
	1342	ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
	1343	for f in formats:
	1344	if f.get('vcodec') != 'none':
	1345	f['stretched_ratio'] = ratio
	1346
	1347	self._sort_formats(formats)
	1348
	1349	return {
	1350	'id': video_id,
	1351	'uploader': video_uploader,
	1352	'uploader_id': video_uploader_id,
	1353	'upload_date': upload_date,
	1354	'title': video_title,
	1355	'thumbnail': video_thumbnail,
	1356	'description': video_description,
	1357	'categories': video_categories,
	1358	'tags': video_tags,
	1359	'subtitles': video_subtitles,
	1360	'automatic_captions': automatic_captions,
	1361	'duration': video_duration,
	1362	'age_limit': 18 if age_gate else 0,
	1363	'annotations': video_annotations,
	1364	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1365	'view_count': view_count,
	1366	'like_count': like_count,
	1367	'dislike_count': dislike_count,
	1368	'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
	1369	'formats': formats,
	1370	'is_live': is_live,
	1371	'start_time': start_time,
	1372	'end_time': end_time,
	1373	}
	1374
	1375
	1376	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1377	IE_DESC = 'YouTube.com playlists'
	1378	_VALID_URL = r"""(?x)(?:
	1379	(?:https?://)?
	1380	(?:\w+\.)?
	1381	youtube\.com/
	1382	(?:
	1383	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1384	\? (?:.?&)? (?:p\|a\|list)=
	1385	\| p/
	1386	)
	1387	(
	1388	(?:PL\|LL\|EC\|UU\|FL\|RD\|UL)?[0-9A-Za-z-_]{10,}
	1389	# Top tracks, they can also include dots
	1390	\|(?:MC)[\w\.]*
	1391	)
	1392	.*
	1393	\|
	1394	((?:PL\|LL\|EC\|UU\|FL\|RD\|UL)[0-9A-Za-z-_]{10,})
	1395	)"""
	1396	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1397	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1398	IE_NAME = 'youtube:playlist'
	1399	_TESTS = [{
	1400	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1401	'info_dict': {
	1402	'title': 'ytdl test PL',
	1403	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1404	},
	1405	'playlist_count': 3,
	1406	}, {
	1407	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1408	'info_dict': {
	1409	'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1410	'title': 'YDL_Empty_List',
	1411	},
	1412	'playlist_count': 0,
	1413	}, {
	1414	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1415	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1416	'info_dict': {
	1417	'title': '29C3: Not my department',
	1418	'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1419	},
	1420	'playlist_count': 95,
	1421	}, {
	1422	'note': 'issue #673',
	1423	'url': 'PLBB231211A4F62143',
	1424	'info_dict': {
	1425	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1426	'id': 'PLBB231211A4F62143',
	1427	},
	1428	'playlist_mincount': 26,
	1429	}, {
	1430	'note': 'Large playlist',
	1431	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1432	'info_dict': {
	1433	'title': 'Uploads from Cauchemar',
	1434	'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
	1435	},
	1436	'playlist_mincount': 799,
	1437	}, {
	1438	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1439	'info_dict': {
	1440	'title': 'YDL_safe_search',
	1441	'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1442	},
	1443	'playlist_count': 2,
	1444	}, {
	1445	'note': 'embedded',
	1446	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1447	'playlist_count': 4,
	1448	'info_dict': {
	1449	'title': 'JODA15',
	1450	'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1451	}
	1452	}, {
	1453	'note': 'Embedded SWF player',
	1454	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1455	'playlist_count': 4,
	1456	'info_dict': {
	1457	'title': 'JODA7',
	1458	'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
	1459	}
	1460	}, {
	1461	'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
	1462	'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
	1463	'info_dict': {
	1464	'title': 'Uploads from Interstellar Movie',
	1465	'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
	1466	},
	1467	'playlist_mincout': 21,
	1468	}]
	1469
	1470	def _real_initialize(self):
	1471	self._login()
	1472
	1473	def _extract_mix(self, playlist_id):
	1474	# The mixes are generated from a single video
	1475	# the id of the playlist is just 'RD' + video_id
	1476	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1477	webpage = self._download_webpage(
	1478	url, playlist_id, 'Downloading Youtube mix')
	1479	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1480	title_span = (
	1481	search_title('playlist-title') or
	1482	search_title('title long-title') or
	1483	search_title('title'))
	1484	title = clean_html(title_span)
	1485	ids = orderedSet(re.findall(
	1486	r'''(?xs)data-video-username=".?".?
	1487	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1488	webpage))
	1489	url_results = self._ids_to_results(ids)
	1490
	1491	return self.playlist_result(url_results, playlist_id, title)
	1492
	1493	def _extract_playlist(self, playlist_id):
	1494	url = self._TEMPLATE_URL % playlist_id
	1495	page = self._download_webpage(url, playlist_id)
	1496
	1497	for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
	1498	match = match.strip()
	1499	# Check if the playlist exists or is private
	1500	if re.match(r'[^<](The\|This) playlist (does not exist\|is private)[^<]', match):
	1501	raise ExtractorError(
	1502	'The playlist doesn\'t exist or is private, use --username or '
	1503	'--netrc to access it.',
	1504	expected=True)
	1505	elif re.match(r'[^<]Invalid parameters[^<]', match):
	1506	raise ExtractorError(
	1507	'Invalid parameters. Maybe URL is incorrect.',
	1508	expected=True)
	1509	elif re.match(r'[^<]Choose your language[^<]', match):
	1510	continue
	1511	else:
	1512	self.report_warning('Youtube gives an alert message: ' + match)
	1513
	1514	# Extract the video ids from the playlist pages
	1515	def _entries():
	1516	more_widget_html = content_html = page
	1517	for page_num in itertools.count(1):
	1518	matches = re.finditer(self._VIDEO_RE, content_html)
	1519	# We remove the duplicates and the link with index 0
	1520	# (it's not the first video of the playlist)
	1521	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1522	for vid_id in new_ids:
	1523	yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1524
	1525	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1526	if not mobj:
	1527	break
	1528
	1529	more = self._download_json(
	1530	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1531	'Downloading page #%s' % page_num,
	1532	transform_source=uppercase_escape)
	1533	content_html = more['content_html']
	1534	if not content_html.strip():
	1535	# Some webpages show a "Load more" button but they don't
	1536	# have more videos
	1537	break
	1538	more_widget_html = more['load_more_widget_html']
	1539
	1540	playlist_title = self._html_search_regex(
	1541	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1542	page, 'title')
	1543
	1544	return self.playlist_result(_entries(), playlist_id, playlist_title)
	1545
	1546	def _real_extract(self, url):
	1547	# Extract playlist id
	1548	mobj = re.match(self._VALID_URL, url)
	1549	if mobj is None:
	1550	raise ExtractorError('Invalid URL: %s' % url)
	1551	playlist_id = mobj.group(1) or mobj.group(2)
	1552
	1553	# Check if it's a video-specific URL
	1554	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1555	if 'v' in query_dict:
	1556	video_id = query_dict['v'][0]
	1557	if self._downloader.params.get('noplaylist'):
	1558	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1559	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1560	else:
	1561	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1562
	1563	if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
	1564	# Mixes require a custom extraction process
	1565	return self._extract_mix(playlist_id)
	1566
	1567	return self._extract_playlist(playlist_id)
	1568
	1569
	1570	class YoutubeChannelIE(InfoExtractor):
	1571	IE_DESC = 'YouTube.com channels'
	1572	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1573	_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
	1574	IE_NAME = 'youtube:channel'
	1575	_TESTS = [{
	1576	'note': 'paginated channel',
	1577	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1578	'playlist_mincount': 91,
	1579	'info_dict': {
	1580	'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
	1581	}
	1582	}]
	1583
	1584	@staticmethod
	1585	def extract_videos_from_page(page):
	1586	ids_in_page = []
	1587	titles_in_page = []
	1588	for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
	1589	video_id = mobj.group('id')
	1590	video_title = unescapeHTML(mobj.group('title'))
	1591	try:
	1592	idx = ids_in_page.index(video_id)
	1593	if video_title and not titles_in_page[idx]:
	1594	titles_in_page[idx] = video_title
	1595	except ValueError:
	1596	ids_in_page.append(video_id)
	1597	titles_in_page.append(video_title)
	1598	return zip(ids_in_page, titles_in_page)
	1599
	1600	def _real_extract(self, url):
	1601	channel_id = self._match_id(url)
	1602
	1603	url = self._TEMPLATE_URL % channel_id
	1604
	1605	# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
	1606	# Workaround by extracting as a playlist if managed to obtain channel playlist URL
	1607	# otherwise fallback on channel by page extraction
	1608	channel_page = self._download_webpage(
	1609	url + '?view=57', channel_id,
	1610	'Downloading channel page', fatal=False)
	1611	channel_playlist_id = self._html_search_meta(
	1612	'channelId', channel_page, 'channel id', default=None)
	1613	if not channel_playlist_id:
	1614	channel_playlist_id = self._search_regex(
	1615	r'data-channel-external-id="([^"]+)"',
	1616	channel_page, 'channel id', default=None)
	1617	if channel_playlist_id and channel_playlist_id.startswith('UC'):
	1618	playlist_id = 'UU' + channel_playlist_id[2:]
	1619	return self.url_result(
	1620	compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
	1621
	1622	channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
	1623	autogenerated = re.search(r'''(?x)
	1624	class="[^"]*?(?:
	1625	channel-header-autogenerated-label\|
	1626	yt-channel-title-autogenerated
	1627	)[^"]*"''', channel_page) is not None
	1628
	1629	if autogenerated:
	1630	# The videos are contained in a single page
	1631	# the ajax pages can't be used, they are empty
	1632	entries = [
	1633	self.url_result(
	1634	video_id, 'Youtube', video_id=video_id,
	1635	video_title=video_title)
	1636	for video_id, video_title in self.extract_videos_from_page(channel_page)]
	1637	return self.playlist_result(entries, channel_id)
	1638
	1639	def _entries():
	1640	more_widget_html = content_html = channel_page
	1641	for pagenum in itertools.count(1):
	1642
	1643	for video_id, video_title in self.extract_videos_from_page(content_html):
	1644	yield self.url_result(
	1645	video_id, 'Youtube', video_id=video_id,
	1646	video_title=video_title)
	1647
	1648	mobj = re.search(
	1649	r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
	1650	more_widget_html)
	1651	if not mobj:
	1652	break
	1653
	1654	more = self._download_json(
	1655	'https://youtube.com/%s' % mobj.group('more'), channel_id,
	1656	'Downloading page #%s' % (pagenum + 1),
	1657	transform_source=uppercase_escape)
	1658	content_html = more['content_html']
	1659	more_widget_html = more['load_more_widget_html']
	1660
	1661	return self.playlist_result(_entries(), channel_id)
	1662
	1663
	1664	class YoutubeUserIE(YoutubeChannelIE):
	1665	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1666	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1667	_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
	1668	IE_NAME = 'youtube:user'
	1669
	1670	_TESTS = [{
	1671	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1672	'playlist_mincount': 320,
	1673	'info_dict': {
	1674	'title': 'TheLinuxFoundation',
	1675	}
	1676	}, {
	1677	'url': 'ytuser:phihag',
	1678	'only_matching': True,
	1679	}]
	1680
	1681	@classmethod
	1682	def suitable(cls, url):
	1683	# Don't return True if the url can be extracted with other youtube
	1684	# extractor, the regex would is too permissive and it would match.
	1685	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1686	if any(ie.suitable(url) for ie in other_ies):
	1687	return False
	1688	else:
	1689	return super(YoutubeUserIE, cls).suitable(url)
	1690
	1691
	1692	class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
	1693	IE_DESC = 'YouTube.com searches'
	1694	# there doesn't appear to be a real limit, for example if you search for
	1695	# 'python' you get more than 8.000.000 results
	1696	_MAX_RESULTS = float('inf')
	1697	IE_NAME = 'youtube:search'
	1698	_SEARCH_KEY = 'ytsearch'
	1699	_EXTRA_QUERY_ARGS = {}
	1700	_TESTS = []
	1701
	1702	def _get_n_results(self, query, n):
	1703	"""Get a specified number of results for a query"""
	1704
	1705	videos = []
	1706	limit = n
	1707
	1708	for pagenum in itertools.count(1):
	1709	url_query = {
	1710	'search_query': query.encode('utf-8'),
	1711	'page': pagenum,
	1712	'spf': 'navigate',
	1713	}
	1714	url_query.update(self._EXTRA_QUERY_ARGS)
	1715	result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
	1716	data = self._download_json(
	1717	result_url, video_id='query "%s"' % query,
	1718	note='Downloading page %s' % pagenum,
	1719	errnote='Unable to download API page')
	1720	html_content = data[1]['body']['content']
	1721
	1722	if 'class="search-message' in html_content:
	1723	raise ExtractorError(
	1724	'[youtube] No video results', expected=True)
	1725
	1726	new_videos = self._ids_to_results(orderedSet(re.findall(
	1727	r'href="/watch\?v=(.{11})', html_content)))
	1728	videos += new_videos
	1729	if not new_videos or len(videos) > limit:
	1730	break
	1731
	1732	if len(videos) > n:
	1733	videos = videos[:n]
	1734	return self.playlist_result(videos, query)
	1735
	1736
	1737	class YoutubeSearchDateIE(YoutubeSearchIE):
	1738	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1739	_SEARCH_KEY = 'ytsearchdate'
	1740	IE_DESC = 'YouTube.com searches, newest videos first'
	1741	_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
	1742
	1743
	1744	class YoutubeSearchURLIE(InfoExtractor):
	1745	IE_DESC = 'YouTube.com search URLs'
	1746	IE_NAME = 'youtube:search_url'
	1747	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1748	_TESTS = [{
	1749	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1750	'playlist_mincount': 5,
	1751	'info_dict': {
	1752	'title': 'youtube-dl test video',
	1753	}
	1754	}]
	1755
	1756	def _real_extract(self, url):
	1757	mobj = re.match(self._VALID_URL, url)
	1758	query = compat_urllib_parse_unquote_plus(mobj.group('query'))
	1759
	1760	webpage = self._download_webpage(url, query)
	1761	result_code = self._search_regex(
	1762	r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1763
	1764	part_codes = re.findall(
	1765	r'(?s)<h3[^>]+class="[^"]yt-lockup-title[^"]"[^>]>(.?)</h3>', result_code)
	1766	entries = []
	1767	for part_code in part_codes:
	1768	part_title = self._html_search_regex(
	1769	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1770	part_url_snippet = self._html_search_regex(
	1771	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1772	part_url = compat_urlparse.urljoin(
	1773	'https://www.youtube.com/', part_url_snippet)
	1774	entries.append({
	1775	'_type': 'url',
	1776	'url': part_url,
	1777	'title': part_title,
	1778	})
	1779
	1780	return {
	1781	'_type': 'playlist',
	1782	'entries': entries,
	1783	'title': query,
	1784	}
	1785
	1786
	1787	class YoutubeShowIE(InfoExtractor):
	1788	IE_DESC = 'YouTube.com (multi-season) shows'
	1789	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1790	IE_NAME = 'youtube:show'
	1791	_TESTS = [{
	1792	'url': 'http://www.youtube.com/show/airdisasters',
	1793	'playlist_mincount': 3,
	1794	'info_dict': {
	1795	'id': 'airdisasters',
	1796	'title': 'Air Disasters',
	1797	}
	1798	}]
	1799
	1800	def _real_extract(self, url):
	1801	mobj = re.match(self._VALID_URL, url)
	1802	playlist_id = mobj.group('id')
	1803	webpage = self._download_webpage(
	1804	url, playlist_id, 'Downloading show webpage')
	1805	# There's one playlist for each season of the show
	1806	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1807	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1808	entries = [
	1809	self.url_result(
	1810	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1811	for season in m_seasons
	1812	]
	1813	title = self._og_search_title(webpage, fatal=False)
	1814
	1815	return {
	1816	'_type': 'playlist',
	1817	'id': playlist_id,
	1818	'title': title,
	1819	'entries': entries,
	1820	}
	1821
	1822
	1823	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1824	"""
	1825	Base class for feed extractors
	1826	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1827	"""
	1828	_LOGIN_REQUIRED = True
	1829
	1830	@property
	1831	def IE_NAME(self):
	1832	return 'youtube:%s' % self._FEED_NAME
	1833
	1834	def _real_initialize(self):
	1835	self._login()
	1836
	1837	def _real_extract(self, url):
	1838	page = self._download_webpage(
	1839	'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
	1840
	1841	# The extraction process is the same as for playlists, but the regex
	1842	# for the video ids doesn't contain an index
	1843	ids = []
	1844	more_widget_html = content_html = page
	1845	for page_num in itertools.count(1):
	1846	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1847
	1848	# 'recommended' feed has infinite 'load more' and each new portion spins
	1849	# the same videos in (sometimes) slightly different order, so we'll check
	1850	# for unicity and break when portion has no new videos
	1851	new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
	1852	if not new_ids:
	1853	break
	1854
	1855	ids.extend(new_ids)
	1856
	1857	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1858	if not mobj:
	1859	break
	1860
	1861	more = self._download_json(
	1862	'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
	1863	'Downloading page #%s' % page_num,
	1864	transform_source=uppercase_escape)
	1865	content_html = more['content_html']
	1866	more_widget_html = more['load_more_widget_html']
	1867
	1868	return self.playlist_result(
	1869	self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
	1870
	1871
	1872	class YoutubeWatchLaterIE(YoutubePlaylistIE):
	1873	IE_NAME = 'youtube:watchlater'
	1874	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1875	_VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later\|playlist\?list=WL)\|:ytwatchlater'
	1876
	1877	_TESTS = [] # override PlaylistIE tests
	1878
	1879	def _real_extract(self, url):
	1880	return self._extract_playlist('WL')
	1881
	1882
	1883	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1884	IE_NAME = 'youtube:favorites'
	1885	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1886	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1887	_LOGIN_REQUIRED = True
	1888
	1889	def _real_extract(self, url):
	1890	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1891	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1892	return self.url_result(playlist_id, 'YoutubePlaylist')
	1893
	1894
	1895	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1896	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1897	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1898	_FEED_NAME = 'recommended'
	1899	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1900
	1901
	1902	class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
	1903	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1904	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1905	_FEED_NAME = 'subscriptions'
	1906	_PLAYLIST_TITLE = 'Youtube Subscriptions'
	1907
	1908
	1909	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1910	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1911	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1912	_FEED_NAME = 'history'
	1913	_PLAYLIST_TITLE = 'Youtube History'
	1914
	1915
	1916	class YoutubeTruncatedURLIE(InfoExtractor):
	1917	IE_NAME = 'youtube:truncated_url'
	1918	IE_DESC = False # Do not list
	1919	_VALID_URL = r'''(?x)
	1920	(?:https?://)?
	1921	(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
	1922	(?:watch\?(?:
	1923	feature=[a-z_]+\|
	1924	annotation_id=annotation_[^&]+\|
	1925	x-yt-cl=[0-9]+\|
	1926	hl=[^&]*\|
	1927	)?
	1928	\|
	1929	attribution_link\?a=[^&]+
	1930	)
	1931	$
	1932	'''
	1933
	1934	_TESTS = [{
	1935	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1936	'only_matching': True,
	1937	}, {
	1938	'url': 'http://www.youtube.com/watch?',
	1939	'only_matching': True,
	1940	}, {
	1941	'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
	1942	'only_matching': True,
	1943	}, {
	1944	'url': 'https://www.youtube.com/watch?feature=foo',
	1945	'only_matching': True,
	1946	}, {
	1947	'url': 'https://www.youtube.com/watch?hl=en-GB',
	1948	'only_matching': True,
	1949	}]
	1950
	1951	def _real_extract(self, url):
	1952	raise ExtractorError(
	1953	'Did you forget to quote the URL? Remember that & is a meta '
	1954	'character in most shells, so you want to put the URL in quotes, '
	1955	'like youtube-dl '
	1956	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1957	' or simply youtube-dl BaW_jenozKc .',
	1958	expected=True)
	1959
	1960
	1961	class YoutubeTruncatedIDIE(InfoExtractor):
	1962	IE_NAME = 'youtube:truncated_id'
	1963	IE_DESC = False # Do not list
	1964	_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
	1965
	1966	_TESTS = [{
	1967	'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
	1968	'only_matching': True,
	1969	}]
	1970
	1971	def _real_extract(self, url):
	1972	video_id = self._match_id(url)
	1973	raise ExtractorError(
	1974	'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
	1975	expected=True)