jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from ..jsinterp import JSInterpreter
	15	from ..swfinterp import SWFInterpreter
	16	from ..compat import (
	17	compat_chr,
	18	compat_parse_qs,
	19	compat_urllib_parse,
	20	compat_urllib_parse_unquote,
	21	compat_urllib_parse_unquote_plus,
	22	compat_urllib_parse_urlparse,
	23	compat_urllib_request,
	24	compat_urlparse,
	25	compat_str,
	26	)
	27	from ..utils import (
	28	clean_html,
	29	ExtractorError,
	30	float_or_none,
	31	get_element_by_attribute,
	32	get_element_by_id,
	33	int_or_none,
	34	orderedSet,
	35	parse_duration,
	36	smuggle_url,
	37	str_to_int,
	38	unescapeHTML,
	39	unified_strdate,
	40	unsmuggle_url,
	41	uppercase_escape,
	42	ISO3166Utils,
	43	)
	44
	45
	46	class YoutubeBaseInfoExtractor(InfoExtractor):
	47	"""Provide base functions for Youtube extractors"""
	48	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	49	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	50	_NETRC_MACHINE = 'youtube'
	51	# If True it will raise an error if no login info is provided
	52	_LOGIN_REQUIRED = False
	53
	54	def _set_language(self):
	55	self._set_cookie(
	56	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	57	# YouTube sets the expire time to about two months
	58	expire_time=time.time() + 2 * 30 * 24 * 3600)
	59
	60	def _ids_to_results(self, ids):
	61	return [
	62	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	63	for vid_id in ids]
	64
	65	def _login(self):
	66	"""
	67	Attempt to log in to YouTube.
	68	True is returned if successful or skipped.
	69	False is returned if login failed.
	70
	71	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	72	"""
	73	(username, password) = self._get_login_info()
	74	# No authentication to be performed
	75	if username is None:
	76	if self._LOGIN_REQUIRED:
	77	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	78	return True
	79
	80	login_page = self._download_webpage(
	81	self._LOGIN_URL, None,
	82	note='Downloading login page',
	83	errnote='unable to fetch login page', fatal=False)
	84	if login_page is False:
	85	return
	86
	87	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	88	login_page, 'Login GALX parameter')
	89
	90	# Log in
	91	login_form_strs = {
	92	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	93	'Email': username,
	94	'GALX': galx,
	95	'Passwd': password,
	96
	97	'PersistentCookie': 'yes',
	98	'_utf8': '霱',
	99	'bgresponse': 'js_disabled',
	100	'checkConnection': '',
	101	'checkedDomains': 'youtube',
	102	'dnConn': '',
	103	'pstMsg': '0',
	104	'rmShown': '1',
	105	'secTok': '',
	106	'signIn': 'Sign in',
	107	'timeStmp': '',
	108	'service': 'youtube',
	109	'uilel': '3',
	110	'hl': 'en_US',
	111	}
	112
	113	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	114	# chokes on unicode
	115	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	116	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	117
	118	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	119	login_results = self._download_webpage(
	120	req, None,
	121	note='Logging in', errnote='unable to log in', fatal=False)
	122	if login_results is False:
	123	return False
	124
	125	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	126	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	127
	128	# Two-Factor
	129	# TODO add SMS and phone call support - these require making a request and then prompting the user
	130
	131	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	132	tfa_code = self._get_tfa_info()
	133
	134	if tfa_code is None:
	135	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	136	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	137	return False
	138
	139	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	140
	141	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	142	if match is None:
	143	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	144	secTok = match.group(1)
	145	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	146	if match is None:
	147	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	148	timeStmp = match.group(1)
	149
	150	tfa_form_strs = {
	151	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	152	'smsToken': '',
	153	'smsUserPin': tfa_code,
	154	'smsVerifyPin': 'Verify',
	155
	156	'PersistentCookie': 'yes',
	157	'checkConnection': '',
	158	'checkedDomains': 'youtube',
	159	'pstMsg': '1',
	160	'secTok': secTok,
	161	'timeStmp': timeStmp,
	162	'service': 'youtube',
	163	'hl': 'en_US',
	164	}
	165	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	166	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	167
	168	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	169	tfa_results = self._download_webpage(
	170	tfa_req, None,
	171	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	172
	173	if tfa_results is False:
	174	return False
	175
	176	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	177	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	178	return False
	179	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	180	self._downloader.report_warning('unable to log in - did the page structure change?')
	181	return False
	182	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	183	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	184	return False
	185
	186	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	187	self._downloader.report_warning('unable to log in: bad username or password')
	188	return False
	189	return True
	190
	191	def _real_initialize(self):
	192	if self._downloader is None:
	193	return
	194	self._set_language()
	195	if not self._login():
	196	return
	197
	198
	199	class YoutubeIE(YoutubeBaseInfoExtractor):
	200	IE_DESC = 'YouTube.com'
	201	_VALID_URL = r"""(?x)^
	202	(
	203	(?:https?://\|//) # http(s):// or protocol-independent URL
	204	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	205	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	206	(?:www\.)?pwnyoutube\.com/\|
	207	(?:www\.)?yourepeat\.com/\|
	208	tube\.majestyc\.net/\|
	209	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	210	(?:.*?\#/)? # handle anchor (#/) redirect urls
	211	(?: # the various things that can precede the ID:
	212	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	213	\|(?: # or the v= param in all its forms
	214	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	215	(?:\?\|\#!?) # the params delimiter ? or # or #!
	216	(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
	217	v=
	218	)
	219	))
	220	\|youtu\.be/ # just youtu.be/xxxx
	221	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	222	)
	223	)? # all until now is optional -> you can pass the naked ID
	224	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	225	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	226	(?(1).+)? # if we found the ID, everything can follow
	227	$"""
	228	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	229	_formats = {
	230	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	231	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	232	'13': {'ext': '3gp'},
	233	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	234	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	235	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	236	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	237	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	238	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	239	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	240	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	241	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	242	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	243	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	244	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	245	'59': {'ext': 'mp4', 'width': 854, 'height': 480},
	246	'78': {'ext': 'mp4', 'width': 854, 'height': 480},
	247
	248
	249	# 3d videos
	250	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	251	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	252	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	253	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	254	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	255	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	256	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	257
	258	# Apple HTTP Live Streaming
	259	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	260	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	261	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	262	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	263	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	264	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	265	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	266
	267	# DASH mp4 video
	268	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	269	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	270	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	271	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	272	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	273	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	274	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	275	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	276	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	277	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	278	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	279
	280	# Dash mp4 audio
	281	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	282	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	283	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	284
	285	# Dash webm
	286	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	287	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	288	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	289	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	290	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	291	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	292	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
	293	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	294	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	295	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	296	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	297	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	298	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	299	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	300	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	301	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	302	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	303	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	304	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	305	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
	306	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
	307
	308	# Dash webm audio
	309	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	310	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	311
	312	# Dash webm audio with opus inside
	313	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	314	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	315	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	316
	317	# RTMP (unnamed)
	318	'_rtmp': {'protocol': 'rtmp'},
	319	}
	320
	321	IE_NAME = 'youtube'
	322	_TESTS = [
	323	{
	324	'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
	325	'info_dict': {
	326	'id': 'BaW_jenozKc',
	327	'ext': 'mp4',
	328	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	329	'uploader': 'Philipp Hagemeister',
	330	'uploader_id': 'phihag',
	331	'upload_date': '20121002',
	332	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	333	'categories': ['Science & Technology'],
	334	'tags': ['youtube-dl'],
	335	'like_count': int,
	336	'dislike_count': int,
	337	'start_time': 1,
	338	'end_time': 9,
	339	}
	340	},
	341	{
	342	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	343	'note': 'Test generic use_cipher_signature video (#897)',
	344	'info_dict': {
	345	'id': 'UxxajLWwzqY',
	346	'ext': 'mp4',
	347	'upload_date': '20120506',
	348	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	349	'description': 'md5:782e8651347686cba06e58f71ab51773',
	350	'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
	351	'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
	352	'iconic ep', 'iconic', 'love', 'it'],
	353	'uploader': 'Icona Pop',
	354	'uploader_id': 'IconaPop',
	355	}
	356	},
	357	{
	358	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	359	'note': 'Test VEVO video with age protection (#956)',
	360	'info_dict': {
	361	'id': '07FYdnEawAQ',
	362	'ext': 'mp4',
	363	'upload_date': '20130703',
	364	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	365	'description': 'md5:64249768eec3bc4276236606ea996373',
	366	'uploader': 'justintimberlakeVEVO',
	367	'uploader_id': 'justintimberlakeVEVO',
	368	}
	369	},
	370	{
	371	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	372	'note': 'Embed-only video (#1746)',
	373	'info_dict': {
	374	'id': 'yZIXLfi8CZQ',
	375	'ext': 'mp4',
	376	'upload_date': '20120608',
	377	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	378	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	379	'uploader': 'SET India',
	380	'uploader_id': 'setindia'
	381	}
	382	},
	383	{
	384	'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
	385	'note': 'Use the first video ID in the URL',
	386	'info_dict': {
	387	'id': 'BaW_jenozKc',
	388	'ext': 'mp4',
	389	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	390	'uploader': 'Philipp Hagemeister',
	391	'uploader_id': 'phihag',
	392	'upload_date': '20121002',
	393	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	394	'categories': ['Science & Technology'],
	395	'tags': ['youtube-dl'],
	396	'like_count': int,
	397	'dislike_count': int,
	398	},
	399	'params': {
	400	'skip_download': True,
	401	},
	402	},
	403	{
	404	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	405	'note': '256k DASH audio (format 141) via DASH manifest',
	406	'info_dict': {
	407	'id': 'a9LDPn-MO4I',
	408	'ext': 'm4a',
	409	'upload_date': '20121002',
	410	'uploader_id': '8KVIDEO',
	411	'description': '',
	412	'uploader': '8KVIDEO',
	413	'title': 'UHDTV TEST 8K VIDEO.mp4'
	414	},
	415	'params': {
	416	'youtube_include_dash_manifest': True,
	417	'format': '141',
	418	},
	419	},
	420	# DASH manifest with encrypted signature
	421	{
	422	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	423	'info_dict': {
	424	'id': 'IB3lcPjvWLA',
	425	'ext': 'm4a',
	426	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	427	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	428	'uploader': 'AfrojackVEVO',
	429	'uploader_id': 'AfrojackVEVO',
	430	'upload_date': '20131011',
	431	},
	432	'params': {
	433	'youtube_include_dash_manifest': True,
	434	'format': '141',
	435	},
	436	},
	437	# JS player signature function name containing $
	438	{
	439	'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
	440	'info_dict': {
	441	'id': 'nfWlot6h_JM',
	442	'ext': 'm4a',
	443	'title': 'Taylor Swift - Shake It Off',
	444	'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
	445	'uploader': 'TaylorSwiftVEVO',
	446	'uploader_id': 'TaylorSwiftVEVO',
	447	'upload_date': '20140818',
	448	},
	449	'params': {
	450	'youtube_include_dash_manifest': True,
	451	'format': '141',
	452	},
	453	},
	454	# Controversy video
	455	{
	456	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	457	'info_dict': {
	458	'id': 'T4XJQO3qol8',
	459	'ext': 'mp4',
	460	'upload_date': '20100909',
	461	'uploader': 'The Amazing Atheist',
	462	'uploader_id': 'TheAmazingAtheist',
	463	'title': 'Burning Everyone\'s Koran',
	464	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	465	}
	466	},
	467	# Normal age-gate video (No vevo, embed allowed)
	468	{
	469	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	470	'info_dict': {
	471	'id': 'HtVdAasjOgU',
	472	'ext': 'mp4',
	473	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	474	'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
	475	'uploader': 'The Witcher',
	476	'uploader_id': 'WitcherGame',
	477	'upload_date': '20140605',
	478	},
	479	},
	480	# Age-gate video with encrypted signature
	481	{
	482	'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
	483	'info_dict': {
	484	'id': '6kLq3WMV1nU',
	485	'ext': 'mp4',
	486	'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
	487	'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
	488	'uploader': 'LloydVEVO',
	489	'uploader_id': 'LloydVEVO',
	490	'upload_date': '20110629',
	491	},
	492	},
	493	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	494	{
	495	'url': '__2ABJjxzNo',
	496	'info_dict': {
	497	'id': '__2ABJjxzNo',
	498	'ext': 'mp4',
	499	'upload_date': '20100430',
	500	'uploader_id': 'deadmau5',
	501	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	502	'uploader': 'deadmau5',
	503	'title': 'Deadmau5 - Some Chords (HD)',
	504	},
	505	'expected_warnings': [
	506	'DASH manifest missing',
	507	]
	508	},
	509	# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
	510	{
	511	'url': 'lqQg6PlCWgI',
	512	'info_dict': {
	513	'id': 'lqQg6PlCWgI',
	514	'ext': 'mp4',
	515	'upload_date': '20120731',
	516	'uploader_id': 'olympic',
	517	'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
	518	'uploader': 'Olympics',
	519	'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
	520	},
	521	'params': {
	522	'skip_download': 'requires avconv',
	523	}
	524	},
	525	# Non-square pixels
	526	{
	527	'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
	528	'info_dict': {
	529	'id': '_b-2C3KPAM0',
	530	'ext': 'mp4',
	531	'stretched_ratio': 16 / 9.,
	532	'upload_date': '20110310',
	533	'uploader_id': 'AllenMeow',
	534	'description': 'made by Wacom from Korea \| 字幕&加油添醋 by TY\'s Allen \| 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
	535	'uploader': '孫艾倫',
	536	'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',
	537	},
	538	},
	539	# url_encoded_fmt_stream_map is empty string
	540	{
	541	'url': 'qEJwOuvDf7I',
	542	'info_dict': {
	543	'id': 'qEJwOuvDf7I',
	544	'ext': 'mp4',
	545	'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
	546	'description': '',
	547	'upload_date': '20150404',
	548	'uploader_id': 'spbelect',
	549	'uploader': 'Наблюдатели Петербурга',
	550	},
	551	'params': {
	552	'skip_download': 'requires avconv',
	553	}
	554	},
	555	# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
	556	{
	557	'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
	558	'info_dict': {
	559	'id': 'FIl7x6_3R5Y',
	560	'ext': 'mp4',
	561	'title': 'md5:7b81415841e02ecd4313668cde88737a',
	562	'description': 'md5:116377fd2963b81ec4ce64b542173306',
	563	'upload_date': '20150625',
	564	'uploader_id': 'dorappi2000',
	565	'uploader': 'dorappi2000',
	566	'formats': 'mincount:33',
	567	},
	568	},
	569	# DASH manifest with segment_list
	570	{
	571	'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
	572	'md5': '8ce563a1d667b599d21064e982ab9e31',
	573	'info_dict': {
	574	'id': 'CsmdDsKjzN8',
	575	'ext': 'mp4',
	576	'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
	577	'uploader': 'Airtek',
	578	'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
	579	'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
	580	'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
	581	},
	582	'params': {
	583	'youtube_include_dash_manifest': True,
	584	'format': '135', # bestvideo
	585	}
	586	},
	587	{
	588	# Multifeed videos (multiple cameras), URL is for Main Camera
	589	'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
	590	'info_dict': {
	591	'id': 'jqWvoWXjCVs',
	592	'title': 'teamPGP: Rocket League Noob Stream',
	593	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	594	},
	595	'playlist': [{
	596	'info_dict': {
	597	'id': 'jqWvoWXjCVs',
	598	'ext': 'mp4',
	599	'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
	600	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	601	'upload_date': '20150721',
	602	'uploader': 'Beer Games Beer',
	603	'uploader_id': 'beergamesbeer',
	604	},
	605	}, {
	606	'info_dict': {
	607	'id': '6h8e8xoXJzg',
	608	'ext': 'mp4',
	609	'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
	610	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	611	'upload_date': '20150721',
	612	'uploader': 'Beer Games Beer',
	613	'uploader_id': 'beergamesbeer',
	614	},
	615	}, {
	616	'info_dict': {
	617	'id': 'PUOgX5z9xZw',
	618	'ext': 'mp4',
	619	'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
	620	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	621	'upload_date': '20150721',
	622	'uploader': 'Beer Games Beer',
	623	'uploader_id': 'beergamesbeer',
	624	},
	625	}, {
	626	'info_dict': {
	627	'id': 'teuwxikvS5k',
	628	'ext': 'mp4',
	629	'title': 'teamPGP: Rocket League Noob Stream (zim)',
	630	'description': 'md5:dc7872fb300e143831327f1bae3af010',
	631	'upload_date': '20150721',
	632	'uploader': 'Beer Games Beer',
	633	'uploader_id': 'beergamesbeer',
	634	},
	635	}],
	636	'params': {
	637	'skip_download': True,
	638	},
	639	}
	640	]
	641
	642	def __init__(self, args, *kwargs):
	643	super(YoutubeIE, self).__init__(args, *kwargs)
	644	self._player_cache = {}
	645
	646	def report_video_info_webpage_download(self, video_id):
	647	"""Report attempt to download video info webpage."""
	648	self.to_screen('%s: Downloading video info webpage' % video_id)
	649
	650	def report_information_extraction(self, video_id):
	651	"""Report attempt to extract video information."""
	652	self.to_screen('%s: Extracting video information' % video_id)
	653
	654	def report_unavailable_format(self, video_id, format):
	655	"""Report extracted video URL."""
	656	self.to_screen('%s: Format %s not available' % (video_id, format))
	657
	658	def report_rtmp_download(self):
	659	"""Indicate the download will use the RTMP protocol."""
	660	self.to_screen('RTMP download detected')
	661
	662	def _signature_cache_id(self, example_sig):
	663	""" Return a string representation of a signature """
	664	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	665
	666	def _extract_signature_function(self, video_id, player_url, example_sig):
	667	id_m = re.match(
	668	r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	669	player_url)
	670	if not id_m:
	671	raise ExtractorError('Cannot identify player %r' % player_url)
	672	player_type = id_m.group('ext')
	673	player_id = id_m.group('id')
	674
	675	# Read from filesystem cache
	676	func_id = '%s_%s_%s' % (
	677	player_type, player_id, self._signature_cache_id(example_sig))
	678	assert os.path.basename(func_id) == func_id
	679
	680	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	681	if cache_spec is not None:
	682	return lambda s: ''.join(s[i] for i in cache_spec)
	683
	684	download_note = (
	685	'Downloading player %s' % player_url
	686	if self._downloader.params.get('verbose') else
	687	'Downloading %s player %s' % (player_type, player_id)
	688	)
	689	if player_type == 'js':
	690	code = self._download_webpage(
	691	player_url, video_id,
	692	note=download_note,
	693	errnote='Download of %s failed' % player_url)
	694	res = self._parse_sig_js(code)
	695	elif player_type == 'swf':
	696	urlh = self._request_webpage(
	697	player_url, video_id,
	698	note=download_note,
	699	errnote='Download of %s failed' % player_url)
	700	code = urlh.read()
	701	res = self._parse_sig_swf(code)
	702	else:
	703	assert False, 'Invalid player type %r' % player_type
	704
	705	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	706	cache_res = res(test_string)
	707	cache_spec = [ord(c) for c in cache_res]
	708
	709	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	710	return res
	711
	712	def _print_sig_code(self, func, example_sig):
	713	def gen_sig_code(idxs):
	714	def _genslice(start, end, step):
	715	starts = '' if start == 0 else str(start)
	716	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	717	steps = '' if step == 1 else (':%d' % step)
	718	return 's[%s%s%s]' % (starts, ends, steps)
	719
	720	step = None
	721	# Quelch pyflakes warnings - start will be set when step is set
	722	start = '(Never used)'
	723	for i, prev in zip(idxs[1:], idxs[:-1]):
	724	if step is not None:
	725	if i - prev == step:
	726	continue
	727	yield _genslice(start, prev, step)
	728	step = None
	729	continue
	730	if i - prev in [-1, 1]:
	731	step = i - prev
	732	start = prev
	733	continue
	734	else:
	735	yield 's[%d]' % prev
	736	if step is None:
	737	yield 's[%d]' % i
	738	else:
	739	yield _genslice(start, i, step)
	740
	741	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	742	cache_res = func(test_string)
	743	cache_spec = [ord(c) for c in cache_res]
	744	expr_code = ' + '.join(gen_sig_code(cache_spec))
	745	signature_id_tuple = '(%s)' % (
	746	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	747	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	748	' return %s\n') % (signature_id_tuple, expr_code)
	749	self.to_screen('Extracted signature function:\n' + code)
	750
	751	def _parse_sig_js(self, jscode):
	752	funcname = self._search_regex(
	753	r'\.sig\\|\\|([a-zA-Z0-9$]+)\(', jscode,
	754	'Initial JS player signature function name')
	755
	756	jsi = JSInterpreter(jscode)
	757	initial_function = jsi.extract_function(funcname)
	758	return lambda s: initial_function([s])
	759
	760	def _parse_sig_swf(self, file_contents):
	761	swfi = SWFInterpreter(file_contents)
	762	TARGET_CLASSNAME = 'SignatureDecipher'
	763	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	764	initial_function = swfi.extract_function(searched_class, 'decipher')
	765	return lambda s: initial_function([s])
	766
	767	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	768	"""Turn the encrypted s field into a working signature"""
	769
	770	if player_url is None:
	771	raise ExtractorError('Cannot decrypt signature without player_url')
	772
	773	if player_url.startswith('//'):
	774	player_url = 'https:' + player_url
	775	try:
	776	player_id = (player_url, self._signature_cache_id(s))
	777	if player_id not in self._player_cache:
	778	func = self._extract_signature_function(
	779	video_id, player_url, s
	780	)
	781	self._player_cache[player_id] = func
	782	func = self._player_cache[player_id]
	783	if self._downloader.params.get('youtube_print_sig_code'):
	784	self._print_sig_code(func, s)
	785	return func(s)
	786	except Exception as e:
	787	tb = traceback.format_exc()
	788	raise ExtractorError(
	789	'Signature extraction failed: ' + tb, cause=e)
	790
	791	def _get_subtitles(self, video_id, webpage):
	792	try:
	793	subs_doc = self._download_xml(
	794	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	795	video_id, note=False)
	796	except ExtractorError as err:
	797	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	798	return {}
	799
	800	sub_lang_list = {}
	801	for track in subs_doc.findall('track'):
	802	lang = track.attrib['lang_code']
	803	if lang in sub_lang_list:
	804	continue
	805	sub_formats = []
	806	for ext in ['sbv', 'vtt', 'srt']:
	807	params = compat_urllib_parse.urlencode({
	808	'lang': lang,
	809	'v': video_id,
	810	'fmt': ext,
	811	'name': track.attrib['name'].encode('utf-8'),
	812	})
	813	sub_formats.append({
	814	'url': 'https://www.youtube.com/api/timedtext?' + params,
	815	'ext': ext,
	816	})
	817	sub_lang_list[lang] = sub_formats
	818	if not sub_lang_list:
	819	self._downloader.report_warning('video doesn\'t have subtitles')
	820	return {}
	821	return sub_lang_list
	822
	823	def _get_automatic_captions(self, video_id, webpage):
	824	"""We need the webpage for getting the captions url, pass it as an
	825	argument to speed up the process."""
	826	self.to_screen('%s: Looking for automatic captions' % video_id)
	827	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	828	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	829	if mobj is None:
	830	self._downloader.report_warning(err_msg)
	831	return {}
	832	player_config = json.loads(mobj.group(1))
	833	try:
	834	args = player_config['args']
	835	caption_url = args['ttsurl']
	836	timestamp = args['timestamp']
	837	# We get the available subtitles
	838	list_params = compat_urllib_parse.urlencode({
	839	'type': 'list',
	840	'tlangs': 1,
	841	'asrs': 1,
	842	})
	843	list_url = caption_url + '&' + list_params
	844	caption_list = self._download_xml(list_url, video_id)
	845	original_lang_node = caption_list.find('track')
	846	if original_lang_node is None:
	847	self._downloader.report_warning('Video doesn\'t have automatic captions')
	848	return {}
	849	original_lang = original_lang_node.attrib['lang_code']
	850	caption_kind = original_lang_node.attrib.get('kind', '')
	851
	852	sub_lang_list = {}
	853	for lang_node in caption_list.findall('target'):
	854	sub_lang = lang_node.attrib['lang_code']
	855	sub_formats = []
	856	for ext in ['sbv', 'vtt', 'srt']:
	857	params = compat_urllib_parse.urlencode({
	858	'lang': original_lang,
	859	'tlang': sub_lang,
	860	'fmt': ext,
	861	'ts': timestamp,
	862	'kind': caption_kind,
	863	})
	864	sub_formats.append({
	865	'url': caption_url + '&' + params,
	866	'ext': ext,
	867	})
	868	sub_lang_list[sub_lang] = sub_formats
	869	return sub_lang_list
	870	# An extractor error can be raise by the download process if there are
	871	# no automatic captions but there are subtitles
	872	except (KeyError, ExtractorError):
	873	self._downloader.report_warning(err_msg)
	874	return {}
	875
	876	@classmethod
	877	def extract_id(cls, url):
	878	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	879	if mobj is None:
	880	raise ExtractorError('Invalid URL: %s' % url)
	881	video_id = mobj.group(2)
	882	return video_id
	883
	884	def _extract_from_m3u8(self, manifest_url, video_id):
	885	url_map = {}
	886
	887	def _get_urls(_manifest):
	888	lines = _manifest.split('\n')
	889	urls = filter(lambda l: l and not l.startswith('#'),
	890	lines)
	891	return urls
	892	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	893	formats_urls = _get_urls(manifest)
	894	for format_url in formats_urls:
	895	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	896	url_map[itag] = format_url
	897	return url_map
	898
	899	def _extract_annotations(self, video_id):
	900	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	901	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	902
	903	def _parse_dash_manifest(
	904	self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
	905	def decrypt_sig(mobj):
	906	s = mobj.group(1)
	907	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	908	return '/signature/%s' % dec_s
	909	dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
	910	dash_doc = self._download_xml(
	911	dash_manifest_url, video_id,
	912	note='Downloading DASH manifest',
	913	errnote='Could not download DASH manifest',
	914	fatal=fatal)
	915
	916	if dash_doc is False:
	917	return []
	918
	919	formats = []
	920	for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
	921	mime_type = a.attrib.get('mimeType')
	922	for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	923	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	924	if url_el is None:
	925	continue
	926	if mime_type == 'text/vtt':
	927	# TODO implement WebVTT downloading
	928	pass
	929	elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
	930	segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
	931	format_id = r.attrib['id']
	932	video_url = url_el.text
	933	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	934	f = {
	935	'format_id': format_id,
	936	'url': video_url,
	937	'width': int_or_none(r.attrib.get('width')),
	938	'height': int_or_none(r.attrib.get('height')),
	939	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	940	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	941	'filesize': filesize,
	942	'fps': int_or_none(r.attrib.get('frameRate')),
	943	}
	944	if segment_list is not None:
	945	f.update({
	946	'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
	947	'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
	948	'protocol': 'http_dash_segments',
	949	})
	950	try:
	951	existing_format = next(
	952	fo for fo in formats
	953	if fo['format_id'] == format_id)
	954	except StopIteration:
	955	full_info = self._formats.get(format_id, {}).copy()
	956	full_info.update(f)
	957	codecs = r.attrib.get('codecs')
	958	if codecs:
	959	if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
	960	full_info['vcodec'] = codecs
	961	elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
	962	full_info['acodec'] = codecs
	963	formats.append(full_info)
	964	else:
	965	existing_format.update(f)
	966	else:
	967	self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
	968	return formats
	969
	970	def _real_extract(self, url):
	971	url, smuggled_data = unsmuggle_url(url, {})
	972
	973	proto = (
	974	'http' if self._downloader.params.get('prefer_insecure', False)
	975	else 'https')
	976
	977	start_time = None
	978	end_time = None
	979	parsed_url = compat_urllib_parse_urlparse(url)
	980	for component in [parsed_url.fragment, parsed_url.query]:
	981	query = compat_parse_qs(component)
	982	if start_time is None and 't' in query:
	983	start_time = parse_duration(query['t'][0])
	984	if start_time is None and 'start' in query:
	985	start_time = parse_duration(query['start'][0])
	986	if end_time is None and 'end' in query:
	987	end_time = parse_duration(query['end'][0])
	988
	989	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	990	mobj = re.search(self._NEXT_URL_RE, url)
	991	if mobj:
	992	url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
	993	video_id = self.extract_id(url)
	994
	995	# Get video webpage
	996	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	997	video_webpage = self._download_webpage(url, video_id)
	998
	999	# Attempt to extract SWF player URL
	1000	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1001	if mobj is not None:
	1002	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1003	else:
	1004	player_url = None
	1005
	1006	dash_mpds = []
	1007
	1008	def add_dash_mpd(video_info):
	1009	dash_mpd = video_info.get('dashmpd')
	1010	if dash_mpd and dash_mpd[0] not in dash_mpds:
	1011	dash_mpds.append(dash_mpd[0])
	1012
	1013	# Get video info
	1014	embed_webpage = None
	1015	is_live = None
	1016	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	1017	age_gate = True
	1018	# We simulate the access to the video from www.youtube.com/v/{video_id}
	1019	# this can be viewed without login into Youtube
	1020	url = proto + '://www.youtube.com/embed/%s' % video_id
	1021	embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
	1022	data = compat_urllib_parse.urlencode({
	1023	'video_id': video_id,
	1024	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	1025	'sts': self._search_regex(
	1026	r'"sts"\s:\s(\d+)', embed_webpage, 'sts', default=''),
	1027	})
	1028	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	1029	video_info_webpage = self._download_webpage(
	1030	video_info_url, video_id,
	1031	note='Refetching age-gated info webpage',
	1032	errnote='unable to download video info webpage')
	1033	video_info = compat_parse_qs(video_info_webpage)
	1034	add_dash_mpd(video_info)
	1035	else:
	1036	age_gate = False
	1037	video_info = None
	1038	# Try looking directly into the video webpage
	1039	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	1040	if mobj:
	1041	json_code = uppercase_escape(mobj.group(1))
	1042	ytplayer_config = json.loads(json_code)
	1043	args = ytplayer_config['args']
	1044	if args.get('url_encoded_fmt_stream_map'):
	1045	# Convert to the same format returned by compat_parse_qs
	1046	video_info = dict((k, [v]) for k, v in args.items())
	1047	add_dash_mpd(video_info)
	1048	if args.get('livestream') == '1' or args.get('live_playback') == 1:
	1049	is_live = True
	1050	if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
	1051	# We also try looking in get_video_info since it may contain different dashmpd
	1052	# URL that points to a DASH manifest with possibly different itag set (some itags
	1053	# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
	1054	# manifest pointed by get_video_info's dashmpd).
	1055	# The general idea is to take a union of itags of both DASH manifests (for example
	1056	# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
	1057	self.report_video_info_webpage_download(video_id)
	1058	for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1059	video_info_url = (
	1060	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1061	% (proto, video_id, el_type))
	1062	video_info_webpage = self._download_webpage(
	1063	video_info_url,
	1064	video_id, note=False,
	1065	errnote='unable to download video info webpage')
	1066	get_video_info = compat_parse_qs(video_info_webpage)
	1067	if get_video_info.get('use_cipher_signature') != ['True']:
	1068	add_dash_mpd(get_video_info)
	1069	if not video_info:
	1070	video_info = get_video_info
	1071	if 'token' in get_video_info:
	1072	break
	1073	if 'token' not in video_info:
	1074	if 'reason' in video_info:
	1075	if 'The uploader has not made this video available in your country.' in video_info['reason']:
	1076	regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
	1077	if regions_allowed:
	1078	raise ExtractorError('YouTube said: This video is available in %s only' % (
	1079	', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
	1080	expected=True)
	1081	raise ExtractorError(
	1082	'YouTube said: %s' % video_info['reason'][0],
	1083	expected=True, video_id=video_id)
	1084	else:
	1085	raise ExtractorError(
	1086	'"token" parameter not in video info for unknown reason',
	1087	video_id=video_id)
	1088
	1089	# title
	1090	if 'title' in video_info:
	1091	video_title = video_info['title'][0]
	1092	else:
	1093	self._downloader.report_warning('Unable to extract video title')
	1094	video_title = '_'
	1095
	1096	# description
	1097	video_description = get_element_by_id("eow-description", video_webpage)
	1098	if video_description:
	1099	video_description = re.sub(r'''(?x)
	1100	<a\s+
	1101	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1102	title="([^"]+)"\s+
	1103	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	1104	class="yt-uix-redirect-link"\s*>
	1105	[^<]+
	1106	</a>
	1107	''', r'\1', video_description)
	1108	video_description = clean_html(video_description)
	1109	else:
	1110	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	1111	if fd_mobj:
	1112	video_description = unescapeHTML(fd_mobj.group(1))
	1113	else:
	1114	video_description = ''
	1115
	1116	if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
	1117	if not self._downloader.params.get('noplaylist'):
	1118	entries = []
	1119	feed_ids = []
	1120	multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
	1121	for feed in multifeed_metadata_list.split(','):
	1122	feed_data = compat_parse_qs(feed)
	1123	entries.append({
	1124	'_type': 'url_transparent',
	1125	'ie_key': 'Youtube',
	1126	'url': smuggle_url(
	1127	'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
	1128	{'force_singlefeed': True}),
	1129	'title': '%s (%s)' % (video_title, feed_data['title'][0]),
	1130	})
	1131	feed_ids.append(feed_data['id'][0])
	1132	self.to_screen(
	1133	'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
	1134	% (', '.join(feed_ids), video_id))
	1135	return self.playlist_result(entries, video_id, video_title, video_description)
	1136	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1137
	1138	if 'view_count' in video_info:
	1139	view_count = int(video_info['view_count'][0])
	1140	else:
	1141	view_count = None
	1142
	1143	# Check for "rental" videos
	1144	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	1145	raise ExtractorError('"rental" videos not supported')
	1146
	1147	# Start extracting information
	1148	self.report_information_extraction(video_id)
	1149
	1150	# uploader
	1151	if 'author' not in video_info:
	1152	raise ExtractorError('Unable to extract uploader name')
	1153	video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
	1154
	1155	# uploader_id
	1156	video_uploader_id = None
	1157	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	1158	if mobj is not None:
	1159	video_uploader_id = mobj.group(1)
	1160	else:
	1161	self._downloader.report_warning('unable to extract uploader nickname')
	1162
	1163	# thumbnail image
	1164	# We try first to get a high quality image:
	1165	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	1166	video_webpage, re.DOTALL)
	1167	if m_thumb is not None:
	1168	video_thumbnail = m_thumb.group(1)
	1169	elif 'thumbnail_url' not in video_info:
	1170	self._downloader.report_warning('unable to extract video thumbnail')
	1171	video_thumbnail = None
	1172	else: # don't panic if we can't find it
	1173	video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
	1174
	1175	# upload date
	1176	upload_date = self._html_search_meta(
	1177	'datePublished', video_webpage, 'upload date', default=None)
	1178	if not upload_date:
	1179	upload_date = self._search_regex(
	1180	[r'(?s)id="eow-date.?>(.?)</span>',
	1181	r'id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live\|Started) on (.+?)</strong>'],
	1182	video_webpage, 'upload date', default=None)
	1183	if upload_date:
	1184	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1185	upload_date = unified_strdate(upload_date)
	1186
	1187	m_cat_container = self._search_regex(
	1188	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	1189	video_webpage, 'categories', default=None)
	1190	if m_cat_container:
	1191	category = self._html_search_regex(
	1192	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	1193	default=None)
	1194	video_categories = None if category is None else [category]
	1195	else:
	1196	video_categories = None
	1197
	1198	video_tags = [
	1199	unescapeHTML(m.group('content'))
	1200	for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
	1201
	1202	def _extract_count(count_name):
	1203	return str_to_int(self._search_regex(
	1204	r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
	1205	% re.escape(count_name),
	1206	video_webpage, count_name, default=None))
	1207
	1208	like_count = _extract_count('like')
	1209	dislike_count = _extract_count('dislike')
	1210
	1211	# subtitles
	1212	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	1213	automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
	1214
	1215	if 'length_seconds' not in video_info:
	1216	self._downloader.report_warning('unable to extract video duration')
	1217	video_duration = None
	1218	else:
	1219	video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
	1220
	1221	# annotations
	1222	video_annotations = None
	1223	if self._downloader.params.get('writeannotations', False):
	1224	video_annotations = self._extract_annotations(video_id)
	1225
	1226	def _map_to_format_list(urlmap):
	1227	formats = []
	1228	for itag, video_real_url in urlmap.items():
	1229	dct = {
	1230	'format_id': itag,
	1231	'url': video_real_url,
	1232	'player_url': player_url,
	1233	}
	1234	if itag in self._formats:
	1235	dct.update(self._formats[itag])
	1236	formats.append(dct)
	1237	return formats
	1238
	1239	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1240	self.report_rtmp_download()
	1241	formats = [{
	1242	'format_id': '_rtmp',
	1243	'protocol': 'rtmp',
	1244	'url': video_info['conn'][0],
	1245	'player_url': player_url,
	1246	}]
	1247	elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
	1248	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	1249	if 'rtmpe%3Dyes' in encoded_url_map:
	1250	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1251	url_map = {}
	1252	for url_data_str in encoded_url_map.split(','):
	1253	url_data = compat_parse_qs(url_data_str)
	1254	if 'itag' not in url_data or 'url' not in url_data:
	1255	continue
	1256	format_id = url_data['itag'][0]
	1257	url = url_data['url'][0]
	1258
	1259	if 'sig' in url_data:
	1260	url += '&signature=' + url_data['sig'][0]
	1261	elif 's' in url_data:
	1262	encrypted_sig = url_data['s'][0]
	1263	ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
	1264
	1265	jsplayer_url_json = self._search_regex(
	1266	ASSETS_RE,
	1267	embed_webpage if age_gate else video_webpage,
	1268	'JS player URL (1)', default=None)
	1269	if not jsplayer_url_json and not age_gate:
	1270	# We need the embed website after all
	1271	if embed_webpage is None:
	1272	embed_url = proto + '://www.youtube.com/embed/%s' % video_id
	1273	embed_webpage = self._download_webpage(
	1274	embed_url, video_id, 'Downloading embed webpage')
	1275	jsplayer_url_json = self._search_regex(
	1276	ASSETS_RE, embed_webpage, 'JS player URL')
	1277
	1278	player_url = json.loads(jsplayer_url_json)
	1279	if player_url is None:
	1280	player_url_json = self._search_regex(
	1281	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	1282	video_webpage, 'age gate player URL')
	1283	player_url = json.loads(player_url_json)
	1284
	1285	if self._downloader.params.get('verbose'):
	1286	if player_url is None:
	1287	player_version = 'unknown'
	1288	player_desc = 'unknown'
	1289	else:
	1290	if player_url.endswith('swf'):
	1291	player_version = self._search_regex(
	1292	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	1293	'flash player', fatal=False)
	1294	player_desc = 'flash player %s' % player_version
	1295	else:
	1296	player_version = self._search_regex(
	1297	r'html5player-([^/]+?)(?:/html5player)?\.js',
	1298	player_url,
	1299	'html5 player', fatal=False)
	1300	player_desc = 'html5 player %s' % player_version
	1301
	1302	parts_sizes = self._signature_cache_id(encrypted_sig)
	1303	self.to_screen('{%s} signature length %s, %s' %
	1304	(format_id, parts_sizes, player_desc))
	1305
	1306	signature = self._decrypt_signature(
	1307	encrypted_sig, video_id, player_url, age_gate)
	1308	url += '&signature=' + signature
	1309	if 'ratebypass' not in url:
	1310	url += '&ratebypass=yes'
	1311	url_map[format_id] = url
	1312	formats = _map_to_format_list(url_map)
	1313	elif video_info.get('hlsvp'):
	1314	manifest_url = video_info['hlsvp'][0]
	1315	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1316	formats = _map_to_format_list(url_map)
	1317	else:
	1318	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1319
	1320	# Look for the DASH manifest
	1321	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1322	dash_mpd_fatal = True
	1323	for dash_manifest_url in dash_mpds:
	1324	dash_formats = {}
	1325	try:
	1326	for df in self._parse_dash_manifest(
	1327	video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
	1328	# Do not overwrite DASH format found in some previous DASH manifest
	1329	if df['format_id'] not in dash_formats:
	1330	dash_formats[df['format_id']] = df
	1331	# Additional DASH manifests may end up in HTTP Error 403 therefore
	1332	# allow them to fail without bug report message if we already have
	1333	# some DASH manifest succeeded. This is temporary workaround to reduce
	1334	# burst of bug reports until we figure out the reason and whether it
	1335	# can be fixed at all.
	1336	dash_mpd_fatal = False
	1337	except (ExtractorError, KeyError) as e:
	1338	self.report_warning(
	1339	'Skipping DASH manifest: %r' % e, video_id)
	1340	if dash_formats:
	1341	# Remove the formats we found through non-DASH, they
	1342	# contain less info and it can be wrong, because we use
	1343	# fixed values (for example the resolution). See
	1344	# https://github.com/rg3/youtube-dl/issues/5774 for an
	1345	# example.
	1346	formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
	1347	formats.extend(dash_formats.values())
	1348
	1349	# Check for malformed aspect ratio
	1350	stretched_m = re.search(
	1351	r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
	1352	video_webpage)
	1353	if stretched_m:
	1354	ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
	1355	for f in formats:
	1356	if f.get('vcodec') != 'none':
	1357	f['stretched_ratio'] = ratio
	1358
	1359	self._sort_formats(formats)
	1360
	1361	return {
	1362	'id': video_id,
	1363	'uploader': video_uploader,
	1364	'uploader_id': video_uploader_id,
	1365	'upload_date': upload_date,
	1366	'title': video_title,
	1367	'thumbnail': video_thumbnail,
	1368	'description': video_description,
	1369	'categories': video_categories,
	1370	'tags': video_tags,
	1371	'subtitles': video_subtitles,
	1372	'automatic_captions': automatic_captions,
	1373	'duration': video_duration,
	1374	'age_limit': 18 if age_gate else 0,
	1375	'annotations': video_annotations,
	1376	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1377	'view_count': view_count,
	1378	'like_count': like_count,
	1379	'dislike_count': dislike_count,
	1380	'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
	1381	'formats': formats,
	1382	'is_live': is_live,
	1383	'start_time': start_time,
	1384	'end_time': end_time,
	1385	}
	1386
	1387
	1388	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1389	IE_DESC = 'YouTube.com playlists'
	1390	_VALID_URL = r"""(?x)(?:
	1391	(?:https?://)?
	1392	(?:\w+\.)?
	1393	youtube\.com/
	1394	(?:
	1395	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1396	\? (?:.?&)? (?:p\|a\|list)=
	1397	\| p/
	1398	)
	1399	(
	1400	(?:PL\|LL\|EC\|UU\|FL\|RD\|UL)?[0-9A-Za-z-_]{10,}
	1401	# Top tracks, they can also include dots
	1402	\|(?:MC)[\w\.]*
	1403	)
	1404	.*
	1405	\|
	1406	((?:PL\|LL\|EC\|UU\|FL\|RD\|UL)[0-9A-Za-z-_]{10,})
	1407	)"""
	1408	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1409	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1410	IE_NAME = 'youtube:playlist'
	1411	_TESTS = [{
	1412	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1413	'info_dict': {
	1414	'title': 'ytdl test PL',
	1415	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1416	},
	1417	'playlist_count': 3,
	1418	}, {
	1419	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1420	'info_dict': {
	1421	'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1422	'title': 'YDL_Empty_List',
	1423	},
	1424	'playlist_count': 0,
	1425	}, {
	1426	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1427	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1428	'info_dict': {
	1429	'title': '29C3: Not my department',
	1430	'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1431	},
	1432	'playlist_count': 95,
	1433	}, {
	1434	'note': 'issue #673',
	1435	'url': 'PLBB231211A4F62143',
	1436	'info_dict': {
	1437	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1438	'id': 'PLBB231211A4F62143',
	1439	},
	1440	'playlist_mincount': 26,
	1441	}, {
	1442	'note': 'Large playlist',
	1443	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1444	'info_dict': {
	1445	'title': 'Uploads from Cauchemar',
	1446	'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
	1447	},
	1448	'playlist_mincount': 799,
	1449	}, {
	1450	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1451	'info_dict': {
	1452	'title': 'YDL_safe_search',
	1453	'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1454	},
	1455	'playlist_count': 2,
	1456	}, {
	1457	'note': 'embedded',
	1458	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1459	'playlist_count': 4,
	1460	'info_dict': {
	1461	'title': 'JODA15',
	1462	'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1463	}
	1464	}, {
	1465	'note': 'Embedded SWF player',
	1466	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1467	'playlist_count': 4,
	1468	'info_dict': {
	1469	'title': 'JODA7',
	1470	'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
	1471	}
	1472	}, {
	1473	'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
	1474	'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
	1475	'info_dict': {
	1476	'title': 'Uploads from Interstellar Movie',
	1477	'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
	1478	},
	1479	'playlist_mincout': 21,
	1480	}]
	1481
	1482	def _real_initialize(self):
	1483	self._login()
	1484
	1485	def _extract_mix(self, playlist_id):
	1486	# The mixes are generated from a single video
	1487	# the id of the playlist is just 'RD' + video_id
	1488	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1489	webpage = self._download_webpage(
	1490	url, playlist_id, 'Downloading Youtube mix')
	1491	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1492	title_span = (
	1493	search_title('playlist-title') or
	1494	search_title('title long-title') or
	1495	search_title('title'))
	1496	title = clean_html(title_span)
	1497	ids = orderedSet(re.findall(
	1498	r'''(?xs)data-video-username=".?".?
	1499	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1500	webpage))
	1501	url_results = self._ids_to_results(ids)
	1502
	1503	return self.playlist_result(url_results, playlist_id, title)
	1504
	1505	def _extract_playlist(self, playlist_id):
	1506	url = self._TEMPLATE_URL % playlist_id
	1507	page = self._download_webpage(url, playlist_id)
	1508
	1509	for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
	1510	match = match.strip()
	1511	# Check if the playlist exists or is private
	1512	if re.match(r'[^<](The\|This) playlist (does not exist\|is private)[^<]', match):
	1513	raise ExtractorError(
	1514	'The playlist doesn\'t exist or is private, use --username or '
	1515	'--netrc to access it.',
	1516	expected=True)
	1517	elif re.match(r'[^<]Invalid parameters[^<]', match):
	1518	raise ExtractorError(
	1519	'Invalid parameters. Maybe URL is incorrect.',
	1520	expected=True)
	1521	elif re.match(r'[^<]Choose your language[^<]', match):
	1522	continue
	1523	else:
	1524	self.report_warning('Youtube gives an alert message: ' + match)
	1525
	1526	# Extract the video ids from the playlist pages
	1527	def _entries():
	1528	more_widget_html = content_html = page
	1529	for page_num in itertools.count(1):
	1530	matches = re.finditer(self._VIDEO_RE, content_html)
	1531	# We remove the duplicates and the link with index 0
	1532	# (it's not the first video of the playlist)
	1533	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1534	for vid_id in new_ids:
	1535	yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
	1536
	1537	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1538	if not mobj:
	1539	break
	1540
	1541	more = self._download_json(
	1542	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1543	'Downloading page #%s' % page_num,
	1544	transform_source=uppercase_escape)
	1545	content_html = more['content_html']
	1546	if not content_html.strip():
	1547	# Some webpages show a "Load more" button but they don't
	1548	# have more videos
	1549	break
	1550	more_widget_html = more['load_more_widget_html']
	1551
	1552	playlist_title = self._html_search_regex(
	1553	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1554	page, 'title')
	1555
	1556	return self.playlist_result(_entries(), playlist_id, playlist_title)
	1557
	1558	def _real_extract(self, url):
	1559	# Extract playlist id
	1560	mobj = re.match(self._VALID_URL, url)
	1561	if mobj is None:
	1562	raise ExtractorError('Invalid URL: %s' % url)
	1563	playlist_id = mobj.group(1) or mobj.group(2)
	1564
	1565	# Check if it's a video-specific URL
	1566	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1567	if 'v' in query_dict:
	1568	video_id = query_dict['v'][0]
	1569	if self._downloader.params.get('noplaylist'):
	1570	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1571	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1572	else:
	1573	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1574
	1575	if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
	1576	# Mixes require a custom extraction process
	1577	return self._extract_mix(playlist_id)
	1578
	1579	return self._extract_playlist(playlist_id)
	1580
	1581
	1582	class YoutubeChannelIE(InfoExtractor):
	1583	IE_DESC = 'YouTube.com channels'
	1584	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1585	_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
	1586	IE_NAME = 'youtube:channel'
	1587	_TESTS = [{
	1588	'note': 'paginated channel',
	1589	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1590	'playlist_mincount': 91,
	1591	'info_dict': {
	1592	'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
	1593	}
	1594	}]
	1595
	1596	@staticmethod
	1597	def extract_videos_from_page(page):
	1598	ids_in_page = []
	1599	titles_in_page = []
	1600	for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
	1601	video_id = mobj.group('id')
	1602	video_title = unescapeHTML(mobj.group('title'))
	1603	try:
	1604	idx = ids_in_page.index(video_id)
	1605	if video_title and not titles_in_page[idx]:
	1606	titles_in_page[idx] = video_title
	1607	except ValueError:
	1608	ids_in_page.append(video_id)
	1609	titles_in_page.append(video_title)
	1610	return zip(ids_in_page, titles_in_page)
	1611
	1612	def _real_extract(self, url):
	1613	channel_id = self._match_id(url)
	1614
	1615	url = self._TEMPLATE_URL % channel_id
	1616
	1617	# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
	1618	# Workaround by extracting as a playlist if managed to obtain channel playlist URL
	1619	# otherwise fallback on channel by page extraction
	1620	channel_page = self._download_webpage(
	1621	url + '?view=57', channel_id,
	1622	'Downloading channel page', fatal=False)
	1623	channel_playlist_id = self._html_search_meta(
	1624	'channelId', channel_page, 'channel id', default=None)
	1625	if not channel_playlist_id:
	1626	channel_playlist_id = self._search_regex(
	1627	r'data-channel-external-id="([^"]+)"',
	1628	channel_page, 'channel id', default=None)
	1629	if channel_playlist_id and channel_playlist_id.startswith('UC'):
	1630	playlist_id = 'UU' + channel_playlist_id[2:]
	1631	return self.url_result(
	1632	compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
	1633
	1634	channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
	1635	autogenerated = re.search(r'''(?x)
	1636	class="[^"]*?(?:
	1637	channel-header-autogenerated-label\|
	1638	yt-channel-title-autogenerated
	1639	)[^"]*"''', channel_page) is not None
	1640
	1641	if autogenerated:
	1642	# The videos are contained in a single page
	1643	# the ajax pages can't be used, they are empty
	1644	entries = [
	1645	self.url_result(
	1646	video_id, 'Youtube', video_id=video_id,
	1647	video_title=video_title)
	1648	for video_id, video_title in self.extract_videos_from_page(channel_page)]
	1649	return self.playlist_result(entries, channel_id)
	1650
	1651	def _entries():
	1652	more_widget_html = content_html = channel_page
	1653	for pagenum in itertools.count(1):
	1654
	1655	for video_id, video_title in self.extract_videos_from_page(content_html):
	1656	yield self.url_result(
	1657	video_id, 'Youtube', video_id=video_id,
	1658	video_title=video_title)
	1659
	1660	mobj = re.search(
	1661	r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
	1662	more_widget_html)
	1663	if not mobj:
	1664	break
	1665
	1666	more = self._download_json(
	1667	'https://youtube.com/%s' % mobj.group('more'), channel_id,
	1668	'Downloading page #%s' % (pagenum + 1),
	1669	transform_source=uppercase_escape)
	1670	content_html = more['content_html']
	1671	more_widget_html = more['load_more_widget_html']
	1672
	1673	return self.playlist_result(_entries(), channel_id)
	1674
	1675
	1676	class YoutubeUserIE(YoutubeChannelIE):
	1677	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1678	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1679	_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
	1680	IE_NAME = 'youtube:user'
	1681
	1682	_TESTS = [{
	1683	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1684	'playlist_mincount': 320,
	1685	'info_dict': {
	1686	'title': 'TheLinuxFoundation',
	1687	}
	1688	}, {
	1689	'url': 'ytuser:phihag',
	1690	'only_matching': True,
	1691	}]
	1692
	1693	@classmethod
	1694	def suitable(cls, url):
	1695	# Don't return True if the url can be extracted with other youtube
	1696	# extractor, the regex would is too permissive and it would match.
	1697	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1698	if any(ie.suitable(url) for ie in other_ies):
	1699	return False
	1700	else:
	1701	return super(YoutubeUserIE, cls).suitable(url)
	1702
	1703
	1704	class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
	1705	IE_DESC = 'YouTube.com searches'
	1706	# there doesn't appear to be a real limit, for example if you search for
	1707	# 'python' you get more than 8.000.000 results
	1708	_MAX_RESULTS = float('inf')
	1709	IE_NAME = 'youtube:search'
	1710	_SEARCH_KEY = 'ytsearch'
	1711	_EXTRA_QUERY_ARGS = {}
	1712	_TESTS = []
	1713
	1714	def _get_n_results(self, query, n):
	1715	"""Get a specified number of results for a query"""
	1716
	1717	videos = []
	1718	limit = n
	1719
	1720	for pagenum in itertools.count(1):
	1721	url_query = {
	1722	'search_query': query.encode('utf-8'),
	1723	'page': pagenum,
	1724	'spf': 'navigate',
	1725	}
	1726	url_query.update(self._EXTRA_QUERY_ARGS)
	1727	result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
	1728	data = self._download_json(
	1729	result_url, video_id='query "%s"' % query,
	1730	note='Downloading page %s' % pagenum,
	1731	errnote='Unable to download API page')
	1732	html_content = data[1]['body']['content']
	1733
	1734	if 'class="search-message' in html_content:
	1735	raise ExtractorError(
	1736	'[youtube] No video results', expected=True)
	1737
	1738	new_videos = self._ids_to_results(orderedSet(re.findall(
	1739	r'href="/watch\?v=(.{11})', html_content)))
	1740	videos += new_videos
	1741	if not new_videos or len(videos) > limit:
	1742	break
	1743
	1744	if len(videos) > n:
	1745	videos = videos[:n]
	1746	return self.playlist_result(videos, query)
	1747
	1748
	1749	class YoutubeSearchDateIE(YoutubeSearchIE):
	1750	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1751	_SEARCH_KEY = 'ytsearchdate'
	1752	IE_DESC = 'YouTube.com searches, newest videos first'
	1753	_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
	1754
	1755
	1756	class YoutubeSearchURLIE(InfoExtractor):
	1757	IE_DESC = 'YouTube.com search URLs'
	1758	IE_NAME = 'youtube:search_url'
	1759	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1760	_TESTS = [{
	1761	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1762	'playlist_mincount': 5,
	1763	'info_dict': {
	1764	'title': 'youtube-dl test video',
	1765	}
	1766	}]
	1767
	1768	def _real_extract(self, url):
	1769	mobj = re.match(self._VALID_URL, url)
	1770	query = compat_urllib_parse_unquote_plus(mobj.group('query'))
	1771
	1772	webpage = self._download_webpage(url, query)
	1773	result_code = self._search_regex(
	1774	r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1775
	1776	part_codes = re.findall(
	1777	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1778	entries = []
	1779	for part_code in part_codes:
	1780	part_title = self._html_search_regex(
	1781	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1782	part_url_snippet = self._html_search_regex(
	1783	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1784	part_url = compat_urlparse.urljoin(
	1785	'https://www.youtube.com/', part_url_snippet)
	1786	entries.append({
	1787	'_type': 'url',
	1788	'url': part_url,
	1789	'title': part_title,
	1790	})
	1791
	1792	return {
	1793	'_type': 'playlist',
	1794	'entries': entries,
	1795	'title': query,
	1796	}
	1797
	1798
	1799	class YoutubeShowIE(InfoExtractor):
	1800	IE_DESC = 'YouTube.com (multi-season) shows'
	1801	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1802	IE_NAME = 'youtube:show'
	1803	_TESTS = [{
	1804	'url': 'http://www.youtube.com/show/airdisasters',
	1805	'playlist_mincount': 3,
	1806	'info_dict': {
	1807	'id': 'airdisasters',
	1808	'title': 'Air Disasters',
	1809	}
	1810	}]
	1811
	1812	def _real_extract(self, url):
	1813	mobj = re.match(self._VALID_URL, url)
	1814	playlist_id = mobj.group('id')
	1815	webpage = self._download_webpage(
	1816	url, playlist_id, 'Downloading show webpage')
	1817	# There's one playlist for each season of the show
	1818	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1819	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1820	entries = [
	1821	self.url_result(
	1822	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1823	for season in m_seasons
	1824	]
	1825	title = self._og_search_title(webpage, fatal=False)
	1826
	1827	return {
	1828	'_type': 'playlist',
	1829	'id': playlist_id,
	1830	'title': title,
	1831	'entries': entries,
	1832	}
	1833
	1834
	1835	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1836	"""
	1837	Base class for feed extractors
	1838	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1839	"""
	1840	_LOGIN_REQUIRED = True
	1841
	1842	@property
	1843	def IE_NAME(self):
	1844	return 'youtube:%s' % self._FEED_NAME
	1845
	1846	def _real_initialize(self):
	1847	self._login()
	1848
	1849	def _real_extract(self, url):
	1850	page = self._download_webpage(
	1851	'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
	1852
	1853	# The extraction process is the same as for playlists, but the regex
	1854	# for the video ids doesn't contain an index
	1855	ids = []
	1856	more_widget_html = content_html = page
	1857	for page_num in itertools.count(1):
	1858	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1859
	1860	# 'recommended' feed has infinite 'load more' and each new portion spins
	1861	# the same videos in (sometimes) slightly different order, so we'll check
	1862	# for unicity and break when portion has no new videos
	1863	new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
	1864	if not new_ids:
	1865	break
	1866
	1867	ids.extend(new_ids)
	1868
	1869	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1870	if not mobj:
	1871	break
	1872
	1873	more = self._download_json(
	1874	'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
	1875	'Downloading page #%s' % page_num,
	1876	transform_source=uppercase_escape)
	1877	content_html = more['content_html']
	1878	more_widget_html = more['load_more_widget_html']
	1879
	1880	return self.playlist_result(
	1881	self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
	1882
	1883
	1884	class YoutubeWatchLaterIE(YoutubePlaylistIE):
	1885	IE_NAME = 'youtube:watchlater'
	1886	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1887	_VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later\|playlist\?list=WL)\|:ytwatchlater'
	1888
	1889	_TESTS = [] # override PlaylistIE tests
	1890
	1891	def _real_extract(self, url):
	1892	return self._extract_playlist('WL')
	1893
	1894
	1895	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1896	IE_NAME = 'youtube:favorites'
	1897	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1898	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1899	_LOGIN_REQUIRED = True
	1900
	1901	def _real_extract(self, url):
	1902	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1903	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1904	return self.url_result(playlist_id, 'YoutubePlaylist')
	1905
	1906
	1907	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1908	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1909	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1910	_FEED_NAME = 'recommended'
	1911	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1912
	1913
	1914	class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
	1915	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1916	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1917	_FEED_NAME = 'subscriptions'
	1918	_PLAYLIST_TITLE = 'Youtube Subscriptions'
	1919
	1920
	1921	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1922	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1923	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1924	_FEED_NAME = 'history'
	1925	_PLAYLIST_TITLE = 'Youtube History'
	1926
	1927
	1928	class YoutubeTruncatedURLIE(InfoExtractor):
	1929	IE_NAME = 'youtube:truncated_url'
	1930	IE_DESC = False # Do not list
	1931	_VALID_URL = r'''(?x)
	1932	(?:https?://)?
	1933	(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
	1934	(?:watch\?(?:
	1935	feature=[a-z_]+\|
	1936	annotation_id=annotation_[^&]+\|
	1937	x-yt-cl=[0-9]+\|
	1938	hl=[^&]*\|
	1939	)?
	1940	\|
	1941	attribution_link\?a=[^&]+
	1942	)
	1943	$
	1944	'''
	1945
	1946	_TESTS = [{
	1947	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1948	'only_matching': True,
	1949	}, {
	1950	'url': 'http://www.youtube.com/watch?',
	1951	'only_matching': True,
	1952	}, {
	1953	'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
	1954	'only_matching': True,
	1955	}, {
	1956	'url': 'https://www.youtube.com/watch?feature=foo',
	1957	'only_matching': True,
	1958	}, {
	1959	'url': 'https://www.youtube.com/watch?hl=en-GB',
	1960	'only_matching': True,
	1961	}]
	1962
	1963	def _real_extract(self, url):
	1964	raise ExtractorError(
	1965	'Did you forget to quote the URL? Remember that & is a meta '
	1966	'character in most shells, so you want to put the URL in quotes, '
	1967	'like youtube-dl '
	1968	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1969	' or simply youtube-dl BaW_jenozKc .',
	1970	expected=True)
	1971
	1972
	1973	class YoutubeTruncatedIDIE(InfoExtractor):
	1974	IE_NAME = 'youtube:truncated_id'
	1975	IE_DESC = False # Do not list
	1976	_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
	1977
	1978	_TESTS = [{
	1979	'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
	1980	'only_matching': True,
	1981	}]
	1982
	1983	def _real_extract(self, url):
	1984	video_id = self._match_id(url)
	1985	raise ExtractorError(
	1986	'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
	1987	expected=True)