jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from ..jsinterp import JSInterpreter
	15	from ..swfinterp import SWFInterpreter
	16	from ..compat import (
	17	compat_chr,
	18	compat_parse_qs,
	19	compat_urllib_parse,
	20	compat_urllib_request,
	21	compat_urlparse,
	22	compat_str,
	23	)
	24	from ..utils import (
	25	clean_html,
	26	ExtractorError,
	27	float_or_none,
	28	get_element_by_attribute,
	29	get_element_by_id,
	30	int_or_none,
	31	orderedSet,
	32	unescapeHTML,
	33	unified_strdate,
	34	uppercase_escape,
	35	)
	36
	37
	38	class YoutubeBaseInfoExtractor(InfoExtractor):
	39	"""Provide base functions for Youtube extractors"""
	40	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	41	_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
	42	_NETRC_MACHINE = 'youtube'
	43	# If True it will raise an error if no login info is provided
	44	_LOGIN_REQUIRED = False
	45
	46	def _set_language(self):
	47	self._set_cookie(
	48	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	49	# YouTube sets the expire time to about two months
	50	expire_time=time.time() + 2 * 30 * 24 * 3600)
	51
	52	def _ids_to_results(self, ids):
	53	return [
	54	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	55	for vid_id in ids]
	56
	57	def _login(self):
	58	"""
	59	Attempt to log in to YouTube.
	60	True is returned if successful or skipped.
	61	False is returned if login failed.
	62
	63	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	64	"""
	65	(username, password) = self._get_login_info()
	66	# No authentication to be performed
	67	if username is None:
	68	if self._LOGIN_REQUIRED:
	69	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	70	return True
	71
	72	login_page = self._download_webpage(
	73	self._LOGIN_URL, None,
	74	note='Downloading login page',
	75	errnote='unable to fetch login page', fatal=False)
	76	if login_page is False:
	77	return
	78
	79	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	80	login_page, 'Login GALX parameter')
	81
	82	# Log in
	83	login_form_strs = {
	84	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	85	'Email': username,
	86	'GALX': galx,
	87	'Passwd': password,
	88
	89	'PersistentCookie': 'yes',
	90	'_utf8': '霱',
	91	'bgresponse': 'js_disabled',
	92	'checkConnection': '',
	93	'checkedDomains': 'youtube',
	94	'dnConn': '',
	95	'pstMsg': '0',
	96	'rmShown': '1',
	97	'secTok': '',
	98	'signIn': 'Sign in',
	99	'timeStmp': '',
	100	'service': 'youtube',
	101	'uilel': '3',
	102	'hl': 'en_US',
	103	}
	104
	105	# Convert to UTF-8 before urlencode because Python 2.x's urlencode
	106	# chokes on unicode
	107	login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
	108	login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
	109
	110	req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
	111	login_results = self._download_webpage(
	112	req, None,
	113	note='Logging in', errnote='unable to log in', fatal=False)
	114	if login_results is False:
	115	return False
	116
	117	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	118	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	119
	120	# Two-Factor
	121	# TODO add SMS and phone call support - these require making a request and then prompting the user
	122
	123	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
	124	tfa_code = self._get_tfa_info()
	125
	126	if tfa_code is None:
	127	self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
	128	self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	129	return False
	130
	131	# Unlike the first login form, secTok and timeStmp are both required for the TFA form
	132
	133	match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	134	if match is None:
	135	self._downloader.report_warning('Failed to get secTok - did the page structure change?')
	136	secTok = match.group(1)
	137	match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M \| re.U)
	138	if match is None:
	139	self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
	140	timeStmp = match.group(1)
	141
	142	tfa_form_strs = {
	143	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	144	'smsToken': '',
	145	'smsUserPin': tfa_code,
	146	'smsVerifyPin': 'Verify',
	147
	148	'PersistentCookie': 'yes',
	149	'checkConnection': '',
	150	'checkedDomains': 'youtube',
	151	'pstMsg': '1',
	152	'secTok': secTok,
	153	'timeStmp': timeStmp,
	154	'service': 'youtube',
	155	'hl': 'en_US',
	156	}
	157	tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
	158	tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
	159
	160	tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
	161	tfa_results = self._download_webpage(
	162	tfa_req, None,
	163	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	164
	165	if tfa_results is False:
	166	return False
	167
	168	if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
	169	self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
	170	return False
	171	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	172	self._downloader.report_warning('unable to log in - did the page structure change?')
	173	return False
	174	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	175	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	176	return False
	177
	178	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	179	self._downloader.report_warning('unable to log in: bad username or password')
	180	return False
	181	return True
	182
	183	def _real_initialize(self):
	184	if self._downloader is None:
	185	return
	186	self._set_language()
	187	if not self._login():
	188	return
	189
	190
	191	class YoutubeIE(YoutubeBaseInfoExtractor):
	192	IE_DESC = 'YouTube.com'
	193	_VALID_URL = r"""(?x)^
	194	(
	195	(?:https?://\|//) # http(s):// or protocol-independent URL
	196	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	197	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	198	(?:www\.)?pwnyoutube\.com/\|
	199	(?:www\.)?yourepeat\.com/\|
	200	tube\.majestyc\.net/\|
	201	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	202	(?:.*?\#/)? # handle anchor (#/) redirect urls
	203	(?: # the various things that can precede the ID:
	204	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	205	\|(?: # or the v= param in all its forms
	206	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	207	(?:\?\|\#!?) # the params delimiter ? or # or #!
	208	(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
	209	v=
	210	)
	211	))
	212	\|youtu\.be/ # just youtu.be/xxxx
	213	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	214	)
	215	)? # all until now is optional -> you can pass the naked ID
	216	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	217	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	218	(?(1).+)? # if we found the ID, everything can follow
	219	$"""
	220	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	221	_formats = {
	222	'5': {'ext': 'flv', 'width': 400, 'height': 240},
	223	'6': {'ext': 'flv', 'width': 450, 'height': 270},
	224	'13': {'ext': '3gp'},
	225	'17': {'ext': '3gp', 'width': 176, 'height': 144},
	226	'18': {'ext': 'mp4', 'width': 640, 'height': 360},
	227	'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
	228	'34': {'ext': 'flv', 'width': 640, 'height': 360},
	229	'35': {'ext': 'flv', 'width': 854, 'height': 480},
	230	'36': {'ext': '3gp', 'width': 320, 'height': 240},
	231	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
	232	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
	233	'43': {'ext': 'webm', 'width': 640, 'height': 360},
	234	'44': {'ext': 'webm', 'width': 854, 'height': 480},
	235	'45': {'ext': 'webm', 'width': 1280, 'height': 720},
	236	'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
	237
	238
	239	# 3d videos
	240	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
	241	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
	242	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
	243	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
	244	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
	245	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
	246	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
	247
	248	# Apple HTTP Live Streaming
	249	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	250	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
	251	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
	252	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
	253	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
	254	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
	255	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
	256
	257	# DASH mp4 video
	258	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	259	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	260	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	261	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	262	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	263	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	264	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	265	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	266	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	267	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
	268	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
	269
	270	# Dash mp4 audio
	271	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	272	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	273	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	274
	275	# Dash webm
	276	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	277	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	278	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	279	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	280	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	281	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
	282	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
	283	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	284	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	285	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	286	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	287	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	288	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	289	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	290	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	291	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
	292	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	293	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	294	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	295	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
	296	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
	297
	298	# Dash webm audio
	299	'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	300	'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	301
	302	# Dash webm audio with opus inside
	303	'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	304	'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	305	'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	306
	307	# RTMP (unnamed)
	308	'_rtmp': {'protocol': 'rtmp'},
	309	}
	310
	311	IE_NAME = 'youtube'
	312	_TESTS = [
	313	{
	314	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
	315	'info_dict': {
	316	'id': 'BaW_jenozKc',
	317	'ext': 'mp4',
	318	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	319	'uploader': 'Philipp Hagemeister',
	320	'uploader_id': 'phihag',
	321	'upload_date': '20121002',
	322	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	323	'categories': ['Science & Technology'],
	324	'like_count': int,
	325	'dislike_count': int,
	326	}
	327	},
	328	{
	329	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	330	'note': 'Test generic use_cipher_signature video (#897)',
	331	'info_dict': {
	332	'id': 'UxxajLWwzqY',
	333	'ext': 'mp4',
	334	'upload_date': '20120506',
	335	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	336	'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
	337	'uploader': 'Icona Pop',
	338	'uploader_id': 'IconaPop',
	339	}
	340	},
	341	{
	342	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	343	'note': 'Test VEVO video with age protection (#956)',
	344	'info_dict': {
	345	'id': '07FYdnEawAQ',
	346	'ext': 'mp4',
	347	'upload_date': '20130703',
	348	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	349	'description': 'md5:64249768eec3bc4276236606ea996373',
	350	'uploader': 'justintimberlakeVEVO',
	351	'uploader_id': 'justintimberlakeVEVO',
	352	}
	353	},
	354	{
	355	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	356	'note': 'Embed-only video (#1746)',
	357	'info_dict': {
	358	'id': 'yZIXLfi8CZQ',
	359	'ext': 'mp4',
	360	'upload_date': '20120608',
	361	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	362	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	363	'uploader': 'SET India',
	364	'uploader_id': 'setindia'
	365	}
	366	},
	367	{
	368	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	369	'note': '256k DASH audio (format 141) via DASH manifest',
	370	'info_dict': {
	371	'id': 'a9LDPn-MO4I',
	372	'ext': 'm4a',
	373	'upload_date': '20121002',
	374	'uploader_id': '8KVIDEO',
	375	'description': '',
	376	'uploader': '8KVIDEO',
	377	'title': 'UHDTV TEST 8K VIDEO.mp4'
	378	},
	379	'params': {
	380	'youtube_include_dash_manifest': True,
	381	'format': '141',
	382	},
	383	},
	384	# DASH manifest with encrypted signature
	385	{
	386	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	387	'info_dict': {
	388	'id': 'IB3lcPjvWLA',
	389	'ext': 'm4a',
	390	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	391	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	392	'uploader': 'AfrojackVEVO',
	393	'uploader_id': 'AfrojackVEVO',
	394	'upload_date': '20131011',
	395	},
	396	'params': {
	397	'youtube_include_dash_manifest': True,
	398	'format': '141',
	399	},
	400	},
	401	# JS player signature function name containing $
	402	{
	403	'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
	404	'info_dict': {
	405	'id': 'nfWlot6h_JM',
	406	'ext': 'm4a',
	407	'title': 'Taylor Swift - Shake It Off',
	408	'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
	409	'uploader': 'TaylorSwiftVEVO',
	410	'uploader_id': 'TaylorSwiftVEVO',
	411	'upload_date': '20140818',
	412	},
	413	'params': {
	414	'youtube_include_dash_manifest': True,
	415	'format': '141',
	416	},
	417	},
	418	# Controversy video
	419	{
	420	'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
	421	'info_dict': {
	422	'id': 'T4XJQO3qol8',
	423	'ext': 'mp4',
	424	'upload_date': '20100909',
	425	'uploader': 'The Amazing Atheist',
	426	'uploader_id': 'TheAmazingAtheist',
	427	'title': 'Burning Everyone\'s Koran',
	428	'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
	429	}
	430	},
	431	# Normal age-gate video (No vevo, embed allowed)
	432	{
	433	'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
	434	'info_dict': {
	435	'id': 'HtVdAasjOgU',
	436	'ext': 'mp4',
	437	'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
	438	'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
	439	'uploader': 'The Witcher',
	440	'uploader_id': 'WitcherGame',
	441	'upload_date': '20140605',
	442	},
	443	},
	444	# Age-gate video with encrypted signature
	445	{
	446	'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
	447	'info_dict': {
	448	'id': '6kLq3WMV1nU',
	449	'ext': 'mp4',
	450	'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
	451	'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
	452	'uploader': 'LloydVEVO',
	453	'uploader_id': 'LloydVEVO',
	454	'upload_date': '20110629',
	455	},
	456	},
	457	# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
	458	{
	459	'url': '__2ABJjxzNo',
	460	'info_dict': {
	461	'id': '__2ABJjxzNo',
	462	'ext': 'mp4',
	463	'upload_date': '20100430',
	464	'uploader_id': 'deadmau5',
	465	'description': 'md5:12c56784b8032162bb936a5f76d55360',
	466	'uploader': 'deadmau5',
	467	'title': 'Deadmau5 - Some Chords (HD)',
	468	},
	469	'expected_warnings': [
	470	'DASH manifest missing',
	471	]
	472	},
	473	# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
	474	{
	475	'url': 'lqQg6PlCWgI',
	476	'info_dict': {
	477	'id': 'lqQg6PlCWgI',
	478	'ext': 'mp4',
	479	'upload_date': '20120731',
	480	'uploader_id': 'olympic',
	481	'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
	482	'uploader': 'Olympics',
	483	'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
	484	},
	485	'params': {
	486	'skip_download': 'requires avconv',
	487	}
	488	},
	489	# Non-square pixels
	490	{
	491	'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
	492	'info_dict': {
	493	'id': '_b-2C3KPAM0',
	494	'ext': 'mp4',
	495	'stretched_ratio': 16 / 9.,
	496	'upload_date': '20110310',
	497	'uploader_id': 'AllenMeow',
	498	'description': 'made by Wacom from Korea \| 字幕&加油添醋 by TY\'s Allen \| 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
	499	'uploader': '孫艾倫',
	500	'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',
	501	},
	502	},
	503	# url_encoded_fmt_stream_map is empty string
	504	{
	505	'url': 'qEJwOuvDf7I',
	506	'info_dict': {
	507	'id': 'qEJwOuvDf7I',
	508	'ext': 'mp4',
	509	'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
	510	'description': '',
	511	'upload_date': '20150404',
	512	'uploader_id': 'spbelect',
	513	'uploader': 'Наблюдатели Петербурга',
	514	},
	515	'params': {
	516	'skip_download': 'requires avconv',
	517	}
	518	},
	519	]
	520
	521	def __init__(self, args, *kwargs):
	522	super(YoutubeIE, self).__init__(args, *kwargs)
	523	self._player_cache = {}
	524
	525	def report_video_info_webpage_download(self, video_id):
	526	"""Report attempt to download video info webpage."""
	527	self.to_screen('%s: Downloading video info webpage' % video_id)
	528
	529	def report_information_extraction(self, video_id):
	530	"""Report attempt to extract video information."""
	531	self.to_screen('%s: Extracting video information' % video_id)
	532
	533	def report_unavailable_format(self, video_id, format):
	534	"""Report extracted video URL."""
	535	self.to_screen('%s: Format %s not available' % (video_id, format))
	536
	537	def report_rtmp_download(self):
	538	"""Indicate the download will use the RTMP protocol."""
	539	self.to_screen('RTMP download detected')
	540
	541	def _signature_cache_id(self, example_sig):
	542	""" Return a string representation of a signature """
	543	return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
	544
	545	def _extract_signature_function(self, video_id, player_url, example_sig):
	546	id_m = re.match(
	547	r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3\|/html5player)?\.(?P<ext>[a-z]+)$',
	548	player_url)
	549	if not id_m:
	550	raise ExtractorError('Cannot identify player %r' % player_url)
	551	player_type = id_m.group('ext')
	552	player_id = id_m.group('id')
	553
	554	# Read from filesystem cache
	555	func_id = '%s_%s_%s' % (
	556	player_type, player_id, self._signature_cache_id(example_sig))
	557	assert os.path.basename(func_id) == func_id
	558
	559	cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
	560	if cache_spec is not None:
	561	return lambda s: ''.join(s[i] for i in cache_spec)
	562
	563	download_note = (
	564	'Downloading player %s' % player_url
	565	if self._downloader.params.get('verbose') else
	566	'Downloading %s player %s' % (player_type, player_id)
	567	)
	568	if player_type == 'js':
	569	code = self._download_webpage(
	570	player_url, video_id,
	571	note=download_note,
	572	errnote='Download of %s failed' % player_url)
	573	res = self._parse_sig_js(code)
	574	elif player_type == 'swf':
	575	urlh = self._request_webpage(
	576	player_url, video_id,
	577	note=download_note,
	578	errnote='Download of %s failed' % player_url)
	579	code = urlh.read()
	580	res = self._parse_sig_swf(code)
	581	else:
	582	assert False, 'Invalid player type %r' % player_type
	583
	584	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	585	cache_res = res(test_string)
	586	cache_spec = [ord(c) for c in cache_res]
	587
	588	self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
	589	return res
	590
	591	def _print_sig_code(self, func, example_sig):
	592	def gen_sig_code(idxs):
	593	def _genslice(start, end, step):
	594	starts = '' if start == 0 else str(start)
	595	ends = (':%d' % (end + step)) if end + step >= 0 else ':'
	596	steps = '' if step == 1 else (':%d' % step)
	597	return 's[%s%s%s]' % (starts, ends, steps)
	598
	599	step = None
	600	# Quelch pyflakes warnings - start will be set when step is set
	601	start = '(Never used)'
	602	for i, prev in zip(idxs[1:], idxs[:-1]):
	603	if step is not None:
	604	if i - prev == step:
	605	continue
	606	yield _genslice(start, prev, step)
	607	step = None
	608	continue
	609	if i - prev in [-1, 1]:
	610	step = i - prev
	611	start = prev
	612	continue
	613	else:
	614	yield 's[%d]' % prev
	615	if step is None:
	616	yield 's[%d]' % i
	617	else:
	618	yield _genslice(start, i, step)
	619
	620	test_string = ''.join(map(compat_chr, range(len(example_sig))))
	621	cache_res = func(test_string)
	622	cache_spec = [ord(c) for c in cache_res]
	623	expr_code = ' + '.join(gen_sig_code(cache_spec))
	624	signature_id_tuple = '(%s)' % (
	625	', '.join(compat_str(len(p)) for p in example_sig.split('.')))
	626	code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
	627	' return %s\n') % (signature_id_tuple, expr_code)
	628	self.to_screen('Extracted signature function:\n' + code)
	629
	630	def _parse_sig_js(self, jscode):
	631	funcname = self._search_regex(
	632	r'\.sig\\|\\|([a-zA-Z0-9$]+)\(', jscode,
	633	'Initial JS player signature function name')
	634
	635	jsi = JSInterpreter(jscode)
	636	initial_function = jsi.extract_function(funcname)
	637	return lambda s: initial_function([s])
	638
	639	def _parse_sig_swf(self, file_contents):
	640	swfi = SWFInterpreter(file_contents)
	641	TARGET_CLASSNAME = 'SignatureDecipher'
	642	searched_class = swfi.extract_class(TARGET_CLASSNAME)
	643	initial_function = swfi.extract_function(searched_class, 'decipher')
	644	return lambda s: initial_function([s])
	645
	646	def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
	647	"""Turn the encrypted s field into a working signature"""
	648
	649	if player_url is None:
	650	raise ExtractorError('Cannot decrypt signature without player_url')
	651
	652	if player_url.startswith('//'):
	653	player_url = 'https:' + player_url
	654	try:
	655	player_id = (player_url, self._signature_cache_id(s))
	656	if player_id not in self._player_cache:
	657	func = self._extract_signature_function(
	658	video_id, player_url, s
	659	)
	660	self._player_cache[player_id] = func
	661	func = self._player_cache[player_id]
	662	if self._downloader.params.get('youtube_print_sig_code'):
	663	self._print_sig_code(func, s)
	664	return func(s)
	665	except Exception as e:
	666	tb = traceback.format_exc()
	667	raise ExtractorError(
	668	'Signature extraction failed: ' + tb, cause=e)
	669
	670	def _get_subtitles(self, video_id, webpage):
	671	try:
	672	subs_doc = self._download_xml(
	673	'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
	674	video_id, note=False)
	675	except ExtractorError as err:
	676	self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
	677	return {}
	678
	679	sub_lang_list = {}
	680	for track in subs_doc.findall('track'):
	681	lang = track.attrib['lang_code']
	682	if lang in sub_lang_list:
	683	continue
	684	sub_formats = []
	685	for ext in ['sbv', 'vtt', 'srt']:
	686	params = compat_urllib_parse.urlencode({
	687	'lang': lang,
	688	'v': video_id,
	689	'fmt': ext,
	690	'name': track.attrib['name'].encode('utf-8'),
	691	})
	692	sub_formats.append({
	693	'url': 'https://www.youtube.com/api/timedtext?' + params,
	694	'ext': ext,
	695	})
	696	sub_lang_list[lang] = sub_formats
	697	if not sub_lang_list:
	698	self._downloader.report_warning('video doesn\'t have subtitles')
	699	return {}
	700	return sub_lang_list
	701
	702	def _get_automatic_captions(self, video_id, webpage):
	703	"""We need the webpage for getting the captions url, pass it as an
	704	argument to speed up the process."""
	705	self.to_screen('%s: Looking for automatic captions' % video_id)
	706	mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
	707	err_msg = 'Couldn\'t find automatic captions for %s' % video_id
	708	if mobj is None:
	709	self._downloader.report_warning(err_msg)
	710	return {}
	711	player_config = json.loads(mobj.group(1))
	712	try:
	713	args = player_config['args']
	714	caption_url = args['ttsurl']
	715	timestamp = args['timestamp']
	716	# We get the available subtitles
	717	list_params = compat_urllib_parse.urlencode({
	718	'type': 'list',
	719	'tlangs': 1,
	720	'asrs': 1,
	721	})
	722	list_url = caption_url + '&' + list_params
	723	caption_list = self._download_xml(list_url, video_id)
	724	original_lang_node = caption_list.find('track')
	725	if original_lang_node is None:
	726	self._downloader.report_warning('Video doesn\'t have automatic captions')
	727	return {}
	728	original_lang = original_lang_node.attrib['lang_code']
	729	caption_kind = original_lang_node.attrib.get('kind', '')
	730
	731	sub_lang_list = {}
	732	for lang_node in caption_list.findall('target'):
	733	sub_lang = lang_node.attrib['lang_code']
	734	sub_formats = []
	735	for ext in ['sbv', 'vtt', 'srt']:
	736	params = compat_urllib_parse.urlencode({
	737	'lang': original_lang,
	738	'tlang': sub_lang,
	739	'fmt': ext,
	740	'ts': timestamp,
	741	'kind': caption_kind,
	742	})
	743	sub_formats.append({
	744	'url': caption_url + '&' + params,
	745	'ext': ext,
	746	})
	747	sub_lang_list[sub_lang] = sub_formats
	748	return sub_lang_list
	749	# An extractor error can be raise by the download process if there are
	750	# no automatic captions but there are subtitles
	751	except (KeyError, ExtractorError):
	752	self._downloader.report_warning(err_msg)
	753	return {}
	754
	755	@classmethod
	756	def extract_id(cls, url):
	757	mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
	758	if mobj is None:
	759	raise ExtractorError('Invalid URL: %s' % url)
	760	video_id = mobj.group(2)
	761	return video_id
	762
	763	def _extract_from_m3u8(self, manifest_url, video_id):
	764	url_map = {}
	765
	766	def _get_urls(_manifest):
	767	lines = _manifest.split('\n')
	768	urls = filter(lambda l: l and not l.startswith('#'),
	769	lines)
	770	return urls
	771	manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
	772	formats_urls = _get_urls(manifest)
	773	for format_url in formats_urls:
	774	itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
	775	url_map[itag] = format_url
	776	return url_map
	777
	778	def _extract_annotations(self, video_id):
	779	url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
	780	return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
	781
	782	def _parse_dash_manifest(
	783	self, video_id, dash_manifest_url, player_url, age_gate):
	784	def decrypt_sig(mobj):
	785	s = mobj.group(1)
	786	dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
	787	return '/signature/%s' % dec_s
	788	dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
	789	dash_doc = self._download_xml(
	790	dash_manifest_url, video_id,
	791	note='Downloading DASH manifest',
	792	errnote='Could not download DASH manifest')
	793
	794	formats = []
	795	for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
	796	mime_type = a.attrib.get('mimeType')
	797	for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
	798	url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
	799	if url_el is None:
	800	continue
	801	if mime_type == 'text/vtt':
	802	# TODO implement WebVTT downloading
	803	pass
	804	elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
	805	format_id = r.attrib['id']
	806	video_url = url_el.text
	807	filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
	808	f = {
	809	'format_id': format_id,
	810	'url': video_url,
	811	'width': int_or_none(r.attrib.get('width')),
	812	'height': int_or_none(r.attrib.get('height')),
	813	'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
	814	'asr': int_or_none(r.attrib.get('audioSamplingRate')),
	815	'filesize': filesize,
	816	'fps': int_or_none(r.attrib.get('frameRate')),
	817	}
	818	try:
	819	existing_format = next(
	820	fo for fo in formats
	821	if fo['format_id'] == format_id)
	822	except StopIteration:
	823	full_info = self._formats.get(format_id, {}).copy()
	824	full_info.update(f)
	825	formats.append(full_info)
	826	else:
	827	existing_format.update(f)
	828	else:
	829	self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
	830	return formats
	831
	832	def _real_extract(self, url):
	833	proto = (
	834	'http' if self._downloader.params.get('prefer_insecure', False)
	835	else 'https')
	836
	837	# Extract original video URL from URL with redirection, like age verification, using next_url parameter
	838	mobj = re.search(self._NEXT_URL_RE, url)
	839	if mobj:
	840	url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
	841	video_id = self.extract_id(url)
	842
	843	# Get video webpage
	844	url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
	845	video_webpage = self._download_webpage(url, video_id)
	846
	847	# Attempt to extract SWF player URL
	848	mobj = re.search(r'swfConfig.?"(https?:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	849	if mobj is not None:
	850	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	851	else:
	852	player_url = None
	853
	854	# Get video info
	855	embed_webpage = None
	856	if re.search(r'player-age-gate-content">', video_webpage) is not None:
	857	age_gate = True
	858	# We simulate the access to the video from www.youtube.com/v/{video_id}
	859	# this can be viewed without login into Youtube
	860	url = proto + '://www.youtube.com/embed/%s' % video_id
	861	embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
	862	data = compat_urllib_parse.urlencode({
	863	'video_id': video_id,
	864	'eurl': 'https://youtube.googleapis.com/v/' + video_id,
	865	'sts': self._search_regex(
	866	r'"sts"\s:\s(\d+)', embed_webpage, 'sts', default=''),
	867	})
	868	video_info_url = proto + '://www.youtube.com/get_video_info?' + data
	869	video_info_webpage = self._download_webpage(
	870	video_info_url, video_id,
	871	note='Refetching age-gated info webpage',
	872	errnote='unable to download video info webpage')
	873	video_info = compat_parse_qs(video_info_webpage)
	874	else:
	875	age_gate = False
	876	try:
	877	# Try looking directly into the video webpage
	878	mobj = re.search(r';ytplayer\.config\s=\s({.*?});', video_webpage)
	879	if not mobj:
	880	raise ValueError('Could not find ytplayer.config') # caught below
	881	json_code = uppercase_escape(mobj.group(1))
	882	ytplayer_config = json.loads(json_code)
	883	args = ytplayer_config['args']
	884	# Convert to the same format returned by compat_parse_qs
	885	video_info = dict((k, [v]) for k, v in args.items())
	886	if not args.get('url_encoded_fmt_stream_map'):
	887	raise ValueError('No stream_map present') # caught below
	888	except ValueError:
	889	# We fallback to the get_video_info pages (used by the embed page)
	890	self.report_video_info_webpage_download(video_id)
	891	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	892	video_info_url = (
	893	'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	894	% (proto, video_id, el_type))
	895	video_info_webpage = self._download_webpage(
	896	video_info_url,
	897	video_id, note=False,
	898	errnote='unable to download video info webpage')
	899	video_info = compat_parse_qs(video_info_webpage)
	900	if 'token' in video_info:
	901	break
	902	if 'token' not in video_info:
	903	if 'reason' in video_info:
	904	raise ExtractorError(
	905	'YouTube said: %s' % video_info['reason'][0],
	906	expected=True, video_id=video_id)
	907	else:
	908	raise ExtractorError(
	909	'"token" parameter not in video info for unknown reason',
	910	video_id=video_id)
	911
	912	if 'view_count' in video_info:
	913	view_count = int(video_info['view_count'][0])
	914	else:
	915	view_count = None
	916
	917	# Check for "rental" videos
	918	if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
	919	raise ExtractorError('"rental" videos not supported')
	920
	921	# Start extracting information
	922	self.report_information_extraction(video_id)
	923
	924	# uploader
	925	if 'author' not in video_info:
	926	raise ExtractorError('Unable to extract uploader name')
	927	video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
	928
	929	# uploader_id
	930	video_uploader_id = None
	931	mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user\|channel)/([^"]+)">', video_webpage)
	932	if mobj is not None:
	933	video_uploader_id = mobj.group(1)
	934	else:
	935	self._downloader.report_warning('unable to extract uploader nickname')
	936
	937	# title
	938	if 'title' in video_info:
	939	video_title = video_info['title'][0]
	940	else:
	941	self._downloader.report_warning('Unable to extract video title')
	942	video_title = '_'
	943
	944	# thumbnail image
	945	# We try first to get a high quality image:
	946	m_thumb = re.search(r'<span itemprop="thumbnail".?href="(.?)">',
	947	video_webpage, re.DOTALL)
	948	if m_thumb is not None:
	949	video_thumbnail = m_thumb.group(1)
	950	elif 'thumbnail_url' not in video_info:
	951	self._downloader.report_warning('unable to extract video thumbnail')
	952	video_thumbnail = None
	953	else: # don't panic if we can't find it
	954	video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
	955
	956	# upload date
	957	upload_date = None
	958	mobj = re.search(r'(?s)id="eow-date.?>(.?)</span>', video_webpage)
	959	if mobj is None:
	960	mobj = re.search(
	961	r'(?s)id="watch-uploader-info".?>.?(?:Published\|Uploaded\|Streamed live) on (.*?)</strong>',
	962	video_webpage)
	963	if mobj is not None:
	964	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	965	upload_date = unified_strdate(upload_date)
	966
	967	m_cat_container = self._search_regex(
	968	r'(?s)<h4[^>]>\sCategory\s</h4>\s<ul[^>]>(.?)</ul>',
	969	video_webpage, 'categories', default=None)
	970	if m_cat_container:
	971	category = self._html_search_regex(
	972	r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
	973	default=None)
	974	video_categories = None if category is None else [category]
	975	else:
	976	video_categories = None
	977
	978	# description
	979	video_description = get_element_by_id("eow-description", video_webpage)
	980	if video_description:
	981	video_description = re.sub(r'''(?x)
	982	<a\s+
	983	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	984	title="([^"]+)"\s+
	985	(?:[a-zA-Z-]+="[^"]+"\s+)*?
	986	class="yt-uix-redirect-link"\s*>
	987	[^<]+
	988	</a>
	989	''', r'\1', video_description)
	990	video_description = clean_html(video_description)
	991	else:
	992	fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
	993	if fd_mobj:
	994	video_description = unescapeHTML(fd_mobj.group(1))
	995	else:
	996	video_description = ''
	997
	998	def _extract_count(count_name):
	999	count = self._search_regex(
	1000	r'id="watch-%s"[^>]>.?([\d,]+)\s*</span>' % re.escape(count_name),
	1001	video_webpage, count_name, default=None)
	1002	if count is not None:
	1003	return int(count.replace(',', ''))
	1004	return None
	1005	like_count = _extract_count('like')
	1006	dislike_count = _extract_count('dislike')
	1007
	1008	# subtitles
	1009	video_subtitles = self.extract_subtitles(video_id, video_webpage)
	1010	automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
	1011
	1012	if 'length_seconds' not in video_info:
	1013	self._downloader.report_warning('unable to extract video duration')
	1014	video_duration = None
	1015	else:
	1016	video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
	1017
	1018	# annotations
	1019	video_annotations = None
	1020	if self._downloader.params.get('writeannotations', False):
	1021	video_annotations = self._extract_annotations(video_id)
	1022
	1023	def _map_to_format_list(urlmap):
	1024	formats = []
	1025	for itag, video_real_url in urlmap.items():
	1026	dct = {
	1027	'format_id': itag,
	1028	'url': video_real_url,
	1029	'player_url': player_url,
	1030	}
	1031	if itag in self._formats:
	1032	dct.update(self._formats[itag])
	1033	formats.append(dct)
	1034	return formats
	1035
	1036	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1037	self.report_rtmp_download()
	1038	formats = [{
	1039	'format_id': '_rtmp',
	1040	'protocol': 'rtmp',
	1041	'url': video_info['conn'][0],
	1042	'player_url': player_url,
	1043	}]
	1044	elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
	1045	encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
	1046	if 'rtmpe%3Dyes' in encoded_url_map:
	1047	raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
	1048	url_map = {}
	1049	for url_data_str in encoded_url_map.split(','):
	1050	url_data = compat_parse_qs(url_data_str)
	1051	if 'itag' not in url_data or 'url' not in url_data:
	1052	continue
	1053	format_id = url_data['itag'][0]
	1054	url = url_data['url'][0]
	1055
	1056	if 'sig' in url_data:
	1057	url += '&signature=' + url_data['sig'][0]
	1058	elif 's' in url_data:
	1059	encrypted_sig = url_data['s'][0]
	1060	ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
	1061
	1062	jsplayer_url_json = self._search_regex(
	1063	ASSETS_RE,
	1064	embed_webpage if age_gate else video_webpage,
	1065	'JS player URL (1)', default=None)
	1066	if not jsplayer_url_json and not age_gate:
	1067	# We need the embed website after all
	1068	if embed_webpage is None:
	1069	embed_url = proto + '://www.youtube.com/embed/%s' % video_id
	1070	embed_webpage = self._download_webpage(
	1071	embed_url, video_id, 'Downloading embed webpage')
	1072	jsplayer_url_json = self._search_regex(
	1073	ASSETS_RE, embed_webpage, 'JS player URL')
	1074
	1075	player_url = json.loads(jsplayer_url_json)
	1076	if player_url is None:
	1077	player_url_json = self._search_regex(
	1078	r'ytplayer\.config.?"url"\s:\s*("[^"]+")',
	1079	video_webpage, 'age gate player URL')
	1080	player_url = json.loads(player_url_json)
	1081
	1082	if self._downloader.params.get('verbose'):
	1083	if player_url is None:
	1084	player_version = 'unknown'
	1085	player_desc = 'unknown'
	1086	else:
	1087	if player_url.endswith('swf'):
	1088	player_version = self._search_regex(
	1089	r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
	1090	'flash player', fatal=False)
	1091	player_desc = 'flash player %s' % player_version
	1092	else:
	1093	player_version = self._search_regex(
	1094	r'html5player-([^/]+?)(?:/html5player)?\.js',
	1095	player_url,
	1096	'html5 player', fatal=False)
	1097	player_desc = 'html5 player %s' % player_version
	1098
	1099	parts_sizes = self._signature_cache_id(encrypted_sig)
	1100	self.to_screen('{%s} signature length %s, %s' %
	1101	(format_id, parts_sizes, player_desc))
	1102
	1103	signature = self._decrypt_signature(
	1104	encrypted_sig, video_id, player_url, age_gate)
	1105	url += '&signature=' + signature
	1106	if 'ratebypass' not in url:
	1107	url += '&ratebypass=yes'
	1108	url_map[format_id] = url
	1109	formats = _map_to_format_list(url_map)
	1110	elif video_info.get('hlsvp'):
	1111	manifest_url = video_info['hlsvp'][0]
	1112	url_map = self._extract_from_m3u8(manifest_url, video_id)
	1113	formats = _map_to_format_list(url_map)
	1114	else:
	1115	raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
	1116
	1117	# Look for the DASH manifest
	1118	if self._downloader.params.get('youtube_include_dash_manifest', True):
	1119	dash_mpd = video_info.get('dashmpd')
	1120	if dash_mpd:
	1121	dash_manifest_url = dash_mpd[0]
	1122	try:
	1123	dash_formats = self._parse_dash_manifest(
	1124	video_id, dash_manifest_url, player_url, age_gate)
	1125	except (ExtractorError, KeyError) as e:
	1126	self.report_warning(
	1127	'Skipping DASH manifest: %r' % e, video_id)
	1128	else:
	1129	# Remove the formats we found through non-DASH, they
	1130	# contain less info and it can be wrong, because we use
	1131	# fixed values (for example the resolution). See
	1132	# https://github.com/rg3/youtube-dl/issues/5774 for an
	1133	# example.
	1134	dash_keys = set(df['format_id'] for df in dash_formats)
	1135	formats = [f for f in formats if f['format_id'] not in dash_keys]
	1136	formats.extend(dash_formats)
	1137
	1138	# Check for malformed aspect ratio
	1139	stretched_m = re.search(
	1140	r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
	1141	video_webpage)
	1142	if stretched_m:
	1143	ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
	1144	for f in formats:
	1145	if f.get('vcodec') != 'none':
	1146	f['stretched_ratio'] = ratio
	1147
	1148	self._sort_formats(formats)
	1149
	1150	return {
	1151	'id': video_id,
	1152	'uploader': video_uploader,
	1153	'uploader_id': video_uploader_id,
	1154	'upload_date': upload_date,
	1155	'title': video_title,
	1156	'thumbnail': video_thumbnail,
	1157	'description': video_description,
	1158	'categories': video_categories,
	1159	'subtitles': video_subtitles,
	1160	'automatic_captions': automatic_captions,
	1161	'duration': video_duration,
	1162	'age_limit': 18 if age_gate else 0,
	1163	'annotations': video_annotations,
	1164	'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
	1165	'view_count': view_count,
	1166	'like_count': like_count,
	1167	'dislike_count': dislike_count,
	1168	'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
	1169	'formats': formats,
	1170	}
	1171
	1172
	1173	class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
	1174	IE_DESC = 'YouTube.com playlists'
	1175	_VALID_URL = r"""(?x)(?:
	1176	(?:https?://)?
	1177	(?:\w+\.)?
	1178	youtube\.com/
	1179	(?:
	1180	(?:course\|view_play_list\|my_playlists\|artist\|playlist\|watch\|embed/videoseries)
	1181	\? (?:.?&)? (?:p\|a\|list)=
	1182	\| p/
	1183	)
	1184	(
	1185	(?:PL\|LL\|EC\|UU\|FL\|RD\|UL)?[0-9A-Za-z-_]{10,}
	1186	# Top tracks, they can also include dots
	1187	\|(?:MC)[\w\.]*
	1188	)
	1189	.*
	1190	\|
	1191	((?:PL\|LL\|EC\|UU\|FL\|RD\|UL)[0-9A-Za-z-_]{10,})
	1192	)"""
	1193	_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
	1194	_VIDEO_RE = r'href="\s/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]?index=(?P<index>\d+)'
	1195	IE_NAME = 'youtube:playlist'
	1196	_TESTS = [{
	1197	'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1198	'info_dict': {
	1199	'title': 'ytdl test PL',
	1200	'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
	1201	},
	1202	'playlist_count': 3,
	1203	}, {
	1204	'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1205	'info_dict': {
	1206	'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
	1207	'title': 'YDL_Empty_List',
	1208	},
	1209	'playlist_count': 0,
	1210	}, {
	1211	'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
	1212	'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1213	'info_dict': {
	1214	'title': '29C3: Not my department',
	1215	'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
	1216	},
	1217	'playlist_count': 95,
	1218	}, {
	1219	'note': 'issue #673',
	1220	'url': 'PLBB231211A4F62143',
	1221	'info_dict': {
	1222	'title': '[OLD]Team Fortress 2 (Class-based LP)',
	1223	'id': 'PLBB231211A4F62143',
	1224	},
	1225	'playlist_mincount': 26,
	1226	}, {
	1227	'note': 'Large playlist',
	1228	'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
	1229	'info_dict': {
	1230	'title': 'Uploads from Cauchemar',
	1231	'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
	1232	},
	1233	'playlist_mincount': 799,
	1234	}, {
	1235	'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1236	'info_dict': {
	1237	'title': 'YDL_safe_search',
	1238	'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
	1239	},
	1240	'playlist_count': 2,
	1241	}, {
	1242	'note': 'embedded',
	1243	'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1244	'playlist_count': 4,
	1245	'info_dict': {
	1246	'title': 'JODA15',
	1247	'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
	1248	}
	1249	}, {
	1250	'note': 'Embedded SWF player',
	1251	'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
	1252	'playlist_count': 4,
	1253	'info_dict': {
	1254	'title': 'JODA7',
	1255	'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
	1256	}
	1257	}, {
	1258	'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
	1259	'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
	1260	'info_dict': {
	1261	'title': 'Uploads from Interstellar Movie',
	1262	'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
	1263	},
	1264	'playlist_mincout': 21,
	1265	}]
	1266
	1267	def _real_initialize(self):
	1268	self._login()
	1269
	1270	def _extract_mix(self, playlist_id):
	1271	# The mixes are generated from a single video
	1272	# the id of the playlist is just 'RD' + video_id
	1273	url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
	1274	webpage = self._download_webpage(
	1275	url, playlist_id, 'Downloading Youtube mix')
	1276	search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
	1277	title_span = (
	1278	search_title('playlist-title') or
	1279	search_title('title long-title') or
	1280	search_title('title'))
	1281	title = clean_html(title_span)
	1282	ids = orderedSet(re.findall(
	1283	r'''(?xs)data-video-username=".?".?
	1284	href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
	1285	webpage))
	1286	url_results = self._ids_to_results(ids)
	1287
	1288	return self.playlist_result(url_results, playlist_id, title)
	1289
	1290	def _extract_playlist(self, playlist_id):
	1291	url = self._TEMPLATE_URL % playlist_id
	1292	page = self._download_webpage(url, playlist_id)
	1293	more_widget_html = content_html = page
	1294
	1295	for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
	1296	match = match.strip()
	1297	# Check if the playlist exists or is private
	1298	if re.match(r'[^<](The\|This) playlist (does not exist\|is private)[^<]', match):
	1299	raise ExtractorError(
	1300	'The playlist doesn\'t exist or is private, use --username or '
	1301	'--netrc to access it.',
	1302	expected=True)
	1303	elif re.match(r'[^<]Invalid parameters[^<]', match):
	1304	raise ExtractorError(
	1305	'Invalid parameters. Maybe URL is incorrect.',
	1306	expected=True)
	1307	elif re.match(r'[^<]Choose your language[^<]', match):
	1308	continue
	1309	else:
	1310	self.report_warning('Youtube gives an alert message: ' + match)
	1311
	1312	# Extract the video ids from the playlist pages
	1313	ids = []
	1314
	1315	for page_num in itertools.count(1):
	1316	matches = re.finditer(self._VIDEO_RE, content_html)
	1317	# We remove the duplicates and the link with index 0
	1318	# (it's not the first video of the playlist)
	1319	new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
	1320	ids.extend(new_ids)
	1321
	1322	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1323	if not mobj:
	1324	break
	1325
	1326	more = self._download_json(
	1327	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	1328	'Downloading page #%s' % page_num,
	1329	transform_source=uppercase_escape)
	1330	content_html = more['content_html']
	1331	if not content_html.strip():
	1332	# Some webpages show a "Load more" button but they don't
	1333	# have more videos
	1334	break
	1335	more_widget_html = more['load_more_widget_html']
	1336
	1337	playlist_title = self._html_search_regex(
	1338	r'(?s)<h1 class="pl-header-title[^"]">\s(.?)\s</h1>',
	1339	page, 'title')
	1340
	1341	url_results = self._ids_to_results(ids)
	1342	return self.playlist_result(url_results, playlist_id, playlist_title)
	1343
	1344	def _real_extract(self, url):
	1345	# Extract playlist id
	1346	mobj = re.match(self._VALID_URL, url)
	1347	if mobj is None:
	1348	raise ExtractorError('Invalid URL: %s' % url)
	1349	playlist_id = mobj.group(1) or mobj.group(2)
	1350
	1351	# Check if it's a video-specific URL
	1352	query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	1353	if 'v' in query_dict:
	1354	video_id = query_dict['v'][0]
	1355	if self._downloader.params.get('noplaylist'):
	1356	self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
	1357	return self.url_result(video_id, 'Youtube', video_id=video_id)
	1358	else:
	1359	self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
	1360
	1361	if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
	1362	# Mixes require a custom extraction process
	1363	return self._extract_mix(playlist_id)
	1364
	1365	return self._extract_playlist(playlist_id)
	1366
	1367
	1368	class YoutubeChannelIE(InfoExtractor):
	1369	IE_DESC = 'YouTube.com channels'
	1370	_VALID_URL = r'https?://(?:youtu\.be\|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
	1371	_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
	1372	IE_NAME = 'youtube:channel'
	1373	_TESTS = [{
	1374	'note': 'paginated channel',
	1375	'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
	1376	'playlist_mincount': 91,
	1377	'info_dict': {
	1378	'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
	1379	}
	1380	}]
	1381
	1382	@staticmethod
	1383	def extract_videos_from_page(page):
	1384	ids_in_page = []
	1385	titles_in_page = []
	1386	for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
	1387	video_id = mobj.group('id')
	1388	video_title = unescapeHTML(mobj.group('title'))
	1389	try:
	1390	idx = ids_in_page.index(video_id)
	1391	if video_title and not titles_in_page[idx]:
	1392	titles_in_page[idx] = video_title
	1393	except ValueError:
	1394	ids_in_page.append(video_id)
	1395	titles_in_page.append(video_title)
	1396	return zip(ids_in_page, titles_in_page)
	1397
	1398	def _real_extract(self, url):
	1399	channel_id = self._match_id(url)
	1400
	1401	url = self._TEMPLATE_URL % channel_id
	1402
	1403	# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
	1404	# Workaround by extracting as a playlist if managed to obtain channel playlist URL
	1405	# otherwise fallback on channel by page extraction
	1406	channel_page = self._download_webpage(
	1407	url + '?view=57', channel_id,
	1408	'Downloading channel page', fatal=False)
	1409	channel_playlist_id = self._search_regex(
	1410	[r'<meta itemprop="channelId" content="([^"]+)">',
	1411	r'data-channel-external-id="([^"]+)"'],
	1412	channel_page, 'channel id', default=None)
	1413	if channel_playlist_id and channel_playlist_id.startswith('UC'):
	1414	playlist_id = 'UU' + channel_playlist_id[2:]
	1415	return self.url_result(
	1416	compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
	1417
	1418	channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
	1419	autogenerated = re.search(r'''(?x)
	1420	class="[^"]*?(?:
	1421	channel-header-autogenerated-label\|
	1422	yt-channel-title-autogenerated
	1423	)[^"]*"''', channel_page) is not None
	1424
	1425	if autogenerated:
	1426	# The videos are contained in a single page
	1427	# the ajax pages can't be used, they are empty
	1428	entries = [
	1429	self.url_result(
	1430	video_id, 'Youtube', video_id=video_id,
	1431	video_title=video_title)
	1432	for video_id, video_title in self.extract_videos_from_page(channel_page)]
	1433	return self.playlist_result(entries, channel_id)
	1434
	1435	def _entries():
	1436	more_widget_html = content_html = channel_page
	1437	for pagenum in itertools.count(1):
	1438
	1439	for video_id, video_title in self.extract_videos_from_page(content_html):
	1440	yield self.url_result(
	1441	video_id, 'Youtube', video_id=video_id,
	1442	video_title=video_title)
	1443
	1444	mobj = re.search(
	1445	r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
	1446	more_widget_html)
	1447	if not mobj:
	1448	break
	1449
	1450	more = self._download_json(
	1451	'https://youtube.com/%s' % mobj.group('more'), channel_id,
	1452	'Downloading page #%s' % (pagenum + 1),
	1453	transform_source=uppercase_escape)
	1454	content_html = more['content_html']
	1455	more_widget_html = more['load_more_widget_html']
	1456
	1457	return self.playlist_result(_entries(), channel_id)
	1458
	1459
	1460	class YoutubeUserIE(YoutubeChannelIE):
	1461	IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
	1462	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link\|watch\|results)(?:$\|[^a-z_A-Z0-9-])))\|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
	1463	_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
	1464	IE_NAME = 'youtube:user'
	1465
	1466	_TESTS = [{
	1467	'url': 'https://www.youtube.com/user/TheLinuxFoundation',
	1468	'playlist_mincount': 320,
	1469	'info_dict': {
	1470	'title': 'TheLinuxFoundation',
	1471	}
	1472	}, {
	1473	'url': 'ytuser:phihag',
	1474	'only_matching': True,
	1475	}]
	1476
	1477	@classmethod
	1478	def suitable(cls, url):
	1479	# Don't return True if the url can be extracted with other youtube
	1480	# extractor, the regex would is too permissive and it would match.
	1481	other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
	1482	if any(ie.suitable(url) for ie in other_ies):
	1483	return False
	1484	else:
	1485	return super(YoutubeUserIE, cls).suitable(url)
	1486
	1487
	1488	class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
	1489	IE_DESC = 'YouTube.com searches'
	1490	# there doesn't appear to be a real limit, for example if you search for
	1491	# 'python' you get more than 8.000.000 results
	1492	_MAX_RESULTS = float('inf')
	1493	IE_NAME = 'youtube:search'
	1494	_SEARCH_KEY = 'ytsearch'
	1495	_EXTRA_QUERY_ARGS = {}
	1496	_TESTS = []
	1497
	1498	def _get_n_results(self, query, n):
	1499	"""Get a specified number of results for a query"""
	1500
	1501	videos = []
	1502	limit = n
	1503
	1504	for pagenum in itertools.count(1):
	1505	url_query = {
	1506	'search_query': query,
	1507	'page': pagenum,
	1508	'spf': 'navigate',
	1509	}
	1510	url_query.update(self._EXTRA_QUERY_ARGS)
	1511	result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
	1512	data = self._download_json(
	1513	result_url, video_id='query "%s"' % query,
	1514	note='Downloading page %s' % pagenum,
	1515	errnote='Unable to download API page')
	1516	html_content = data[1]['body']['content']
	1517
	1518	if 'class="search-message' in html_content:
	1519	raise ExtractorError(
	1520	'[youtube] No video results', expected=True)
	1521
	1522	new_videos = self._ids_to_results(orderedSet(re.findall(
	1523	r'href="/watch\?v=(.{11})', html_content)))
	1524	videos += new_videos
	1525	if not new_videos or len(videos) > limit:
	1526	break
	1527
	1528	if len(videos) > n:
	1529	videos = videos[:n]
	1530	return self.playlist_result(videos, query)
	1531
	1532
	1533	class YoutubeSearchDateIE(YoutubeSearchIE):
	1534	IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
	1535	_SEARCH_KEY = 'ytsearchdate'
	1536	IE_DESC = 'YouTube.com searches, newest videos first'
	1537	_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
	1538
	1539
	1540	class YoutubeSearchURLIE(InfoExtractor):
	1541	IE_DESC = 'YouTube.com search URLs'
	1542	IE_NAME = 'youtube:search_url'
	1543	_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]\|$)'
	1544	_TESTS = [{
	1545	'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
	1546	'playlist_mincount': 5,
	1547	'info_dict': {
	1548	'title': 'youtube-dl test video',
	1549	}
	1550	}]
	1551
	1552	def _real_extract(self, url):
	1553	mobj = re.match(self._VALID_URL, url)
	1554	query = compat_urllib_parse.unquote_plus(mobj.group('query'))
	1555
	1556	webpage = self._download_webpage(url, query)
	1557	result_code = self._search_regex(
	1558	r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
	1559
	1560	part_codes = re.findall(
	1561	r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
	1562	entries = []
	1563	for part_code in part_codes:
	1564	part_title = self._html_search_regex(
	1565	[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
	1566	part_url_snippet = self._html_search_regex(
	1567	r'(?s)href="([^"]+)"', part_code, 'item URL')
	1568	part_url = compat_urlparse.urljoin(
	1569	'https://www.youtube.com/', part_url_snippet)
	1570	entries.append({
	1571	'_type': 'url',
	1572	'url': part_url,
	1573	'title': part_title,
	1574	})
	1575
	1576	return {
	1577	'_type': 'playlist',
	1578	'entries': entries,
	1579	'title': query,
	1580	}
	1581
	1582
	1583	class YoutubeShowIE(InfoExtractor):
	1584	IE_DESC = 'YouTube.com (multi-season) shows'
	1585	_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
	1586	IE_NAME = 'youtube:show'
	1587	_TESTS = [{
	1588	'url': 'http://www.youtube.com/show/airdisasters',
	1589	'playlist_mincount': 3,
	1590	'info_dict': {
	1591	'id': 'airdisasters',
	1592	'title': 'Air Disasters',
	1593	}
	1594	}]
	1595
	1596	def _real_extract(self, url):
	1597	mobj = re.match(self._VALID_URL, url)
	1598	playlist_id = mobj.group('id')
	1599	webpage = self._download_webpage(
	1600	url, playlist_id, 'Downloading show webpage')
	1601	# There's one playlist for each season of the show
	1602	m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
	1603	self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
	1604	entries = [
	1605	self.url_result(
	1606	'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
	1607	for season in m_seasons
	1608	]
	1609	title = self._og_search_title(webpage, fatal=False)
	1610
	1611	return {
	1612	'_type': 'playlist',
	1613	'id': playlist_id,
	1614	'title': title,
	1615	'entries': entries,
	1616	}
	1617
	1618
	1619	class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
	1620	"""
	1621	Base class for feed extractors
	1622	Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
	1623	"""
	1624	_LOGIN_REQUIRED = True
	1625
	1626	@property
	1627	def IE_NAME(self):
	1628	return 'youtube:%s' % self._FEED_NAME
	1629
	1630	def _real_initialize(self):
	1631	self._login()
	1632
	1633	def _real_extract(self, url):
	1634	page = self._download_webpage(
	1635	'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
	1636
	1637	# The extraction process is the same as for playlists, but the regex
	1638	# for the video ids doesn't contain an index
	1639	ids = []
	1640	more_widget_html = content_html = page
	1641	for page_num in itertools.count(1):
	1642	matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
	1643
	1644	# 'recommended' feed has infinite 'load more' and each new portion spins
	1645	# the same videos in (sometimes) slightly different order, so we'll check
	1646	# for unicity and break when portion has no new videos
	1647	new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
	1648	if not new_ids:
	1649	break
	1650
	1651	ids.extend(new_ids)
	1652
	1653	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	1654	if not mobj:
	1655	break
	1656
	1657	more = self._download_json(
	1658	'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
	1659	'Downloading page #%s' % page_num,
	1660	transform_source=uppercase_escape)
	1661	content_html = more['content_html']
	1662	more_widget_html = more['load_more_widget_html']
	1663
	1664	return self.playlist_result(
	1665	self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
	1666
	1667
	1668	class YoutubeWatchLaterIE(YoutubePlaylistIE):
	1669	IE_NAME = 'youtube:watchlater'
	1670	IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
	1671	_VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later\|playlist\?list=WL)\|:ytwatchlater'
	1672
	1673	_TESTS = [] # override PlaylistIE tests
	1674
	1675	def _real_extract(self, url):
	1676	return self._extract_playlist('WL')
	1677
	1678
	1679	class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
	1680	IE_NAME = 'youtube:favorites'
	1681	IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
	1682	_VALID_URL = r'https?://www\.youtube\.com/my_favorites\|:ytfav(?:ou?rites)?'
	1683	_LOGIN_REQUIRED = True
	1684
	1685	def _real_extract(self, url):
	1686	webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
	1687	playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
	1688	return self.url_result(playlist_id, 'YoutubePlaylist')
	1689
	1690
	1691	class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
	1692	IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
	1693	_VALID_URL = r'https?://www\.youtube\.com/feed/recommended\|:ytrec(?:ommended)?'
	1694	_FEED_NAME = 'recommended'
	1695	_PLAYLIST_TITLE = 'Youtube Recommended videos'
	1696
	1697
	1698	class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
	1699	IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
	1700	_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions\|:ytsubs(?:criptions)?'
	1701	_FEED_NAME = 'subscriptions'
	1702	_PLAYLIST_TITLE = 'Youtube Subscriptions'
	1703
	1704
	1705	class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
	1706	IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
	1707	_VALID_URL = 'https?://www\.youtube\.com/feed/history\|:ythistory'
	1708	_FEED_NAME = 'history'
	1709	_PLAYLIST_TITLE = 'Youtube History'
	1710
	1711
	1712	class YoutubeTruncatedURLIE(InfoExtractor):
	1713	IE_NAME = 'youtube:truncated_url'
	1714	IE_DESC = False # Do not list
	1715	_VALID_URL = r'''(?x)
	1716	(?:https?://)?
	1717	(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
	1718	(?:watch\?(?:
	1719	feature=[a-z_]+\|
	1720	annotation_id=annotation_[^&]+\|
	1721	x-yt-cl=[0-9]+\|
	1722	hl=[^&]*\|
	1723	)?
	1724	\|
	1725	attribution_link\?a=[^&]+
	1726	)
	1727	$
	1728	'''
	1729
	1730	_TESTS = [{
	1731	'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
	1732	'only_matching': True,
	1733	}, {
	1734	'url': 'http://www.youtube.com/watch?',
	1735	'only_matching': True,
	1736	}, {
	1737	'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
	1738	'only_matching': True,
	1739	}, {
	1740	'url': 'https://www.youtube.com/watch?feature=foo',
	1741	'only_matching': True,
	1742	}, {
	1743	'url': 'https://www.youtube.com/watch?hl=en-GB',
	1744	'only_matching': True,
	1745	}]
	1746
	1747	def _real_extract(self, url):
	1748	raise ExtractorError(
	1749	'Did you forget to quote the URL? Remember that & is a meta '
	1750	'character in most shells, so you want to put the URL in quotes, '
	1751	'like youtube-dl '
	1752	'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
	1753	' or simply youtube-dl BaW_jenozKc .',
	1754	expected=True)
	1755
	1756
	1757	class YoutubeTruncatedIDIE(InfoExtractor):
	1758	IE_NAME = 'youtube:truncated_id'
	1759	IE_DESC = False # Do not list
	1760	_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
	1761
	1762	_TESTS = [{
	1763	'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
	1764	'only_matching': True,
	1765	}]
	1766
	1767	def _real_extract(self, url):
	1768	video_id = self._match_id(url)
	1769	raise ExtractorError(
	1770	'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
	1771	expected=True)