jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import re
	10	import time
	11	import traceback
	12
	13	from .common import InfoExtractor, SearchInfoExtractor
	14	from ..jsinterp import JSInterpreter
	15	from ..swfinterp import SWFInterpreter
	16	from ..compat import (
	17	compat_chr,
	18	compat_parse_qs,
	19	compat_urllib_parse,
	20	compat_urllib_parse_unquote,
	21	compat_urllib_parse_unquote_plus,
	22	compat_urllib_parse_urlparse,
	23	compat_urlparse,
	24	compat_str,
	25	)
	26	from ..utils import (
	27	clean_html,
	28	encode_dict,
	29	error_to_compat_str,
	30	ExtractorError,
	31	float_or_none,
	32	get_element_by_attribute,
	33	get_element_by_id,
	34	int_or_none,
	35	mimetype2ext,
	36	orderedSet,
	37	parse_duration,
	38	remove_quotes,
	39	remove_start,
	40	sanitized_Request,
	41	smuggle_url,
	42	str_to_int,
	43	unescapeHTML,
	44	unified_strdate,
	45	unsmuggle_url,
	46	uppercase_escape,
	47	ISO3166Utils,
	48	)
	49
	50
	51	class YoutubeBaseInfoExtractor(InfoExtractor):
	52	"""Provide base functions for Youtube extractors"""
	53	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	54	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	55	_NETRC_MACHINE = 'youtube'
	56	# If True it will raise an error if no login info is provided
	57	_LOGIN_REQUIRED = False
	58
	59	def _set_language(self):
	60	self._set_cookie(
	61	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	62	# YouTube sets the expire time to about two months
	63	expire_time=time.time() + 2 * 30 * 24 * 3600)
	64
	65	def _ids_to_results(self, ids):
	66	return [
	67	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	68	for vid_id in ids]
	69
	70	def _login(self):
	71	"""
	72	Attempt to log in to YouTube.
	73	True is returned if successful or skipped.
	74	False is returned if login failed.
	75
	76	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	77	"""
	78	(username, password) = self._get_login_info()
	79	# No authentication to be performed
	80	if username is None:
	81	if self._LOGIN_REQUIRED:
	82	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	83	return True
	84
	85	login_page = self._download_webpage(
	86	self._LOGIN_URL, None,
	87	note='Downloading login page',
	88	errnote='unable to fetch login page', fatal=False)
	89	if login_page is False:
	90	return
	91
	92	galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
	93	login_page, 'Login GALX parameter')
	94
	95	# Log in
	96	login_form_strs = {
	97	'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
	98	'Email': username,
	99	'GALX': galx,
	100	'Passwd': password,
	101
	102	'PersistentCookie': 'yes',
	103	'_utf8': '霱',
	104	'bgresponse': 'js_disabled',
	105	'checkConnection': '',
	106	'checkedDomains': 'youtube',
	107	'dnConn': '',
	108	'pstMsg': '0',
	109	'rmShown': '1',
	110	'secTok': '',
	111	'signIn': 'Sign in',
	112	'timeStmp': '',
	113	'service': 'youtube',
	114	'uilel': '3',
	115	'hl': 'en_US',
	116	}
	117
	118	login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
	119
	120	req = sanitized_Request(self._LOGIN_URL, login_data)
	121	login_results = self._download_webpage(
	122	req, None,
	123	note='Logging in', errnote='unable to log in', fatal=False)
	124	if login_results is False:
	125	return False
	126
	127	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	128	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	129
	130	# Two-Factor
	131	# TODO add SMS and phone call support - these require making a request and then prompting the user
	132
	133	if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
	134	tfa_code = self._get_tfa_info('2-step verification code')
	135
	136	if not tfa_code:
	137	self._downloader.report_warning(
	138	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	139	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	140	return False
	141
	142	tfa_code = remove_start(tfa_code, 'G-')
	143
	144	tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
	145
	146	tfa_form_strs.update({
	147	'Pin': tfa_code,
	148	'TrustDevice': 'on',
	149	})
	150
	151	tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
	152
	153	tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
	154	tfa_results = self._download_webpage(
	155	tfa_req, None,
	156	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	157
	158	if tfa_results is False:
	159	return False
	160
	161	if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
	162	self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
	163	return False
	164	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
	165	self._downloader.report_warning('unable to log in - did the page structure change?')
	166	return False
	167	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	168	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	169	return False
	170
	171	if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
	172	self._downloader.report_warning('unable to log in: bad username or password')
	173	return False
	174	return True
	175
	176	def _real_initialize(self):
	177	if self._downloader is None:
	178	return
	179	self._set_language()
	180	if not self._login():
	181	return
	182
	183
	184	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	185	# Extract entries from page with "Load more" button
	186	def _entries(self, page, playlist_id):
	187	more_widget_html = content_html = page
	188	for page_num in itertools.count(1):
	189	for entry in self._process_page(content_html):
	190	yield entry
	191
	192	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	193	if not mobj:
	194	break
	195
	196	more = self._download_json(
	197	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	198	'Downloading page #%s' % page_num,
	199	transform_source=uppercase_escape)
	200	content_html = more['content_html']
	201	if not content_html.strip():
	202	# Some webpages show a "Load more" button but they don't
	203	# have more videos
	204	break
	205	more_widget_html = more['load_more_widget_html']
	206
	207
	208	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	209	def _process_page(self, content):
	210	for video_id, video_title in self.extract_videos_from_page(content):
	211	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	212
	213	def extract_videos_from_page(self, page):
	214	ids_in_page = []
	215	titles_in_page = []
	216	for mobj in re.finditer(self._VIDEO_RE, page):
	217	# The link with index 0 is not the first video of the playlist (not sure if still actual)
	218	if 'index' in mobj.groupdict() and mobj.group('id') == '0':
	219	continue
	220	video_id = mobj.group('id')
	221	video_title = unescapeHTML(mobj.group('title'))
	222	if video_title:
	223	video_title = video_title.strip()
	224	try:
	225	idx = ids_in_page.index(video_id)
	226	if video_title and not titles_in_page[idx]:
	227	titles_in_page[idx] = video_title
	228	except ValueError:
	229	ids_in_page.append(video_id)
	230	titles_in_page.append(video_title)
	231	return zip(ids_in_page, titles_in_page)
	232
	233
	234	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	235	def _process_page(self, content):
	236	for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
	237	yield self.url_result(
	238	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	239
	240	def _real_extract(self, url):
	241	playlist_id = self._match_id(url)
	242	webpage = self._download_webpage(url, playlist_id)
	243	title = self._og_search_title(webpage, fatal=False)
	244	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	245
	246
	247	class YoutubeIE(YoutubeBaseInfoExtractor):
	248	IE_DESC = 'YouTube.com'
	249	_VALID_URL = r"""(?x)^
	250	(
	251	(?:https?://\|//) # http(s):// or protocol-independent URL
	252	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	253	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	254	(?:www\.)?pwnyoutube\.com/\|
	255	(?:www\.)?yourepeat\.com/\|
	256	tube\.majestyc\.net/\|
	257	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	258	(?:.*?\#/)? # handle anchor (#/) redirect urls
	259	(?: # the various things that can precede the ID:
	260	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	261	\|(?: # or the v= param in all its forms
	262	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	263	(?:\?\|\#!?) # the params delimiter ? or # or #!
	264	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	265	v=
	266	)
	267	))
	268	\|(?:
	269	youtu\.be\| # just youtu.be/xxxx
	270	vid\.plus # or vid.plus/xxxx
	271	)/
	272	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	273	)
	274	)? # all until now is optional -> you can pass the naked ID
	275	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	276	(?!.*?&list=) # combined list/video URLs are handled by the playlist IE
	277	(?(1).+)? # if we found the ID, everything can follow
	278	$"""
	279	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	280	_formats = {
	281	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	282	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	283	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	284	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	285	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	286	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	287	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	288	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	289	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	290	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	291	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	292	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	293	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	294	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	295	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	296	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	297	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	298	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	299
	300
	301	# 3D videos
	302	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	303	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	304	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	305	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	306	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	307	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	308	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	309
	310	# Apple HTTP Live Streaming
	311	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	312	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	313	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	314	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	315	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	316	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	317	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	318
	319	# DASH mp4 video
	320	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	321	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	322	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	323	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	324	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	325	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	326	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	327	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	328	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
	329	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
	330	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	331
	332	# Dash mp4 audio
	333	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	334	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	335	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	336
	337	# Dash webm
	338	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	339	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	340	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	341	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	342	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	343	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	344	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
	345	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	346	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	347	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	348	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	349	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	350	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	351	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	352	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	353	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	354	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	355	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	356	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	357	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	358	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	359	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	360
	361	# Dash webm audio
	362	'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	363	'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	364
	365	# Dash webm audio with opus inside
	366	'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	367	'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	368	'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	369
	370	# RTMP (unnamed)
	371	'_rtmp': {'protocol': 'rtmp'},
	372	}
	373	_SUBTITLE_FORMATS = ('ttml', 'vtt')
	374
	375	IE_NAME = 'youtube'
	376	_TESTS = [
	377	{
	378	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
	379	'info_dict': {
	380	'id': 'BaW_jenozKc',
	381	'ext': 'mp4',
	382	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	383	'uploader': 'Philipp Hagemeister',
	384	'uploader_id': 'phihag',
	385	'upload_date': '20121002',
	386	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	387	'categories': ['Science & Technology'],
	388	'tags': ['youtube-dl'],
	389	'like_count': int,
	390	'dislike_count': int,
	391	'start_time': 1,
	392	'end_time': 9,
	393	}
	394	},
	395	{
	396	'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
	397	'note': 'Test generic use_cipher_signature video (#897)',
	398	'info_dict': {
	399	'id': 'UxxajLWwzqY',
	400	'ext': 'mp4',
	401	'upload_date': '20120506',
	402	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	403	'alt_title': 'I Love It (feat. Charli XCX)',
	404	'description': 'md5:782e8651347686cba06e58f71ab51773',
	405	'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
	406	'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
	407	'iconic ep', 'iconic', 'love', 'it'],
	408	'uploader': 'Icona Pop',
	409	'uploader_id': 'IconaPop',
	410	'creator': 'Icona Pop',
	411	}
	412	},
	413	{
	414	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	415	'note': 'Test VEVO video with age protection (#956)',
	416	'info_dict': {
	417	'id': '07FYdnEawAQ',
	418	'ext': 'mp4',
	419	'upload_date': '20130703',
	420	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	421	'alt_title': 'Tunnel Vision',
	422	'description': 'md5:64249768eec3bc4276236606ea996373',
	423	'uploader': 'justintimberlakeVEVO',
	424	'uploader_id': 'justintimberlakeVEVO',
	425	'creator': 'Justin Timberlake',
	426	'age_limit': 18,
	427	}
	428	},
	429	{
	430	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	431	'note': 'Embed-only video (#1746)',
	432	'info_dict': {
	433	'id': 'yZIXLfi8CZQ',
	434	'ext': 'mp4',
	435	'upload_date': '20120608',
	436	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	437	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	438	'uploader': 'SET India',
	439	'uploader_id': 'setindia',
	440	'age_limit': 18,
	441	}
	442	},
	443	{
	444	'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
	445	'note': 'Use the first video ID in the URL',
	446	'info_dict': {
	447	'id': 'BaW_jenozKc',
	448	'ext': 'mp4',
	449	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	450	'uploader': 'Philipp Hagemeister',
	451	'uploader_id': 'phihag',
	452	'upload_date': '20121002',
	453	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	454	'categories': ['Science & Technology'],
	455	'tags': ['youtube-dl'],
	456	'like_count': int,
	457	'dislike_count': int,
	458	},
	459	'params': {
	460	'skip_download': True,
	461	},
	462	},
	463	{
	464	'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
	465	'note': '256k DASH audio (format 141) via DASH manifest',
	466	'info_dict': {
	467	'id': 'a9LDPn-MO4I',
	468	'ext': 'm4a',
	469	'upload_date': '20121002',
	470	'uploader_id': '8KVIDEO',
	471	'description': '',
	472	'uploader': '8KVIDEO',
	473	'title': 'UHDTV TEST 8K VIDEO.mp4'
	474	},
	475	'params': {
	476	'youtube_include_dash_manifest': True,
	477	'format': '141',
	478	},
	479	},
	480	# DASH manifest with encrypted signature
	481	{
	482	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	483	'info_dict': {
	484	'id': 'IB3lcPjvWLA',
	485	'ext': 'm4a',
	486	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	487	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	488	'uploader': 'AfrojackVEVO',
	489	'uploader_id': 'AfrojackVEVO',
	490	'upload_date': '20131011',
	491	},
	492	'params': {
	493	'youtube_include_dash_manifest': True,
	494	'format': '141',
	495	},
	496	},
	497	# JS player signature function name containing $
	498	{
	499	'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
	500	'info_dict': {

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

14

from ..jsinterp import JSInterpreter

15

from ..swfinterp import SWFInterpreter

16

from ..compat import (

compat_chr,

compat_parse_qs,

compat_urllib_parse,

compat_urllib_parse_unquote,

21

compat_urllib_parse_unquote_plus,

22

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

clean_html,

encode_dict,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

mimetype2ext,

orderedSet,

parse_duration,

remove_quotes,

remove_start,

sanitized_Request,

smuggle_url,

str_to_int,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

ISO3166Utils,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

52

"""Provide base functions for Youtube extractors"""

53

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

54

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

55

_NETRC_MACHINE = 'youtube'

56

# If True it will raise an error if no login info is provided

57

_LOGIN_REQUIRED = False

58

59

def _set_language(self):

60

self._set_cookie(

61

'.youtube.com', 'PREF', 'f1=50000000&hl=en',

62

# YouTube sets the expire time to about two months

63

expire_time=time.time() + 2 * 30 * 24 * 3600)

64

65

def _ids_to_results(self, ids):

66

return [

67

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

73

True is returned if successful or skipped.

74

False is returned if login failed.

75

76

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

77

"""

78

(username, password) = self._get_login_info()

79

# No authentication to be performed

80

if username is None:

81

if self._LOGIN_REQUIRED:

82

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

83

return True

84

85

login_page = self._download_webpage(

86

self._LOGIN_URL, None,

87

note='Downloading login page',

88

errnote='unable to fetch login page', fatal=False)

89

if login_page is False:

90

return

91

92

galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',

93

login_page, 'Login GALX parameter')

# Log in

login_form_strs = {

'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',

'Email': username,

'GALX': galx,

'Passwd': password,

'PersistentCookie': 'yes',

103

'_utf8': '霱',

104

'bgresponse': 'js_disabled',

105

'checkConnection': '',

106

'checkedDomains': 'youtube',

'dnConn': '',

'pstMsg': '0',

'rmShown': '1',

'secTok': '',

'signIn': 'Sign in',

'timeStmp': '',

'service': 'youtube',

'uilel': '3',

'hl': 'en_US',

}

login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')

119

120

req = sanitized_Request(self._LOGIN_URL, login_data)

121

login_results = self._download_webpage(

122

req, None,

123

note='Logging in', errnote='unable to log in', fatal=False)

124

if login_results is False:

125

return False

126

127

if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:

128

raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)

129

130

# Two-Factor

131

# TODO add SMS and phone call support - these require making a request and then prompting the user

132

133

if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:

134

tfa_code = self._get_tfa_info('2-step verification code')

135

136

if not tfa_code:

137

self._downloader.report_warning(

138

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

139

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

140

return False

141

142

tfa_code = remove_start(tfa_code, 'G-')

143

144

tfa_form_strs = self._form_hidden_inputs('challenge', login_results)

145

146

tfa_form_strs.update({

'Pin': tfa_code,

'TrustDevice': 'on',

})

tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')

152

153

tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)

154

tfa_results = self._download_webpage(

155

tfa_req, None,

156

note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)

157

158

if tfa_results is False:

159

return False

160

161

if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:

162

self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')

163

return False

164

if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:

165

self._downloader.report_warning('unable to log in - did the page structure change?')

166

return False

167

if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:

168

self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')

169

return False

170

171

if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:

172

self._downloader.report_warning('unable to log in: bad username or password')

return False

return True

def _real_initialize(self):

177

if self._downloader is None:

178

return

179

self._set_language()

180

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

185

# Extract entries from page with "Load more" button

186

def _entries(self, page, playlist_id):

187

more_widget_html = content_html = page

188

for page_num in itertools.count(1):

189

for entry in self._process_page(content_html):

190

yield entry

191

192

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

197

'https://youtube.com/%s' % mobj.group('more'), playlist_id,

198

'Downloading page #%s' % page_num,

199

transform_source=uppercase_escape)

200

content_html = more['content_html']

201

if not content_html.strip():

202

# Some webpages show a "Load more" button but they don't

203

# have more videos

204

break

205

more_widget_html = more['load_more_widget_html']

206

207

208

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

209

def _process_page(self, content):

210

for video_id, video_title in self.extract_videos_from_page(content):

211

yield self.url_result(video_id, 'Youtube', video_id, video_title)

212

213

def extract_videos_from_page(self, page):

214

ids_in_page = []

215

titles_in_page = []

216

for mobj in re.finditer(self._VIDEO_RE, page):

217

# The link with index 0 is not the first video of the playlist (not sure if still actual)

218

if 'index' in mobj.groupdict() and mobj.group('id') == '0':

219

continue

220

video_id = mobj.group('id')

221

video_title = unescapeHTML(mobj.group('title'))

222

if video_title:

223

video_title = video_title.strip()

224

try:

225

idx = ids_in_page.index(video_id)

226

if video_title and not titles_in_page[idx]:

227

titles_in_page[idx] = video_title

228

except ValueError:

229

ids_in_page.append(video_id)

230

titles_in_page.append(video_title)

231

return zip(ids_in_page, titles_in_page)

232

233

234

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

235

def _process_page(self, content):

236

for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):

237

yield self.url_result(

238

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

239

240

def _real_extract(self, url):

241

playlist_id = self._match_id(url)

242

webpage = self._download_webpage(url, playlist_id)

243

title = self._og_search_title(webpage, fatal=False)

244

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

245

246

247

class YoutubeIE(YoutubeBaseInfoExtractor):

248

IE_DESC = 'YouTube.com'

249

_VALID_URL = r"""(?x)^

250

(

251

(?:https?://|//) # http(s):// or protocol-independent URL

252

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|

253

(?:www\.)?deturl\.com/www\.youtube\.com/|

254

(?:www\.)?pwnyoutube\.com/|

255

(?:www\.)?yourepeat\.com/|

256

tube\.majestyc\.net/|

257

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

258

(?:.*?\#/)? # handle anchor (#/) redirect urls

259

(?: # the various things that can precede the ID:

260

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

261

|(?: # or the v= param in all its forms

262

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

263

(?:\?|\#!?) # the params delimiter ? or # or #!

264

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

270

vid\.plus # or vid.plus/xxxx

271

)/

272

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

273

)

274

)? # all until now is optional -> you can pass the naked ID

275

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

276

(?!.*?&list=) # combined list/video URLs are handled by the playlist IE

277

(?(1).+)? # if we found the ID, everything can follow

278

$"""

279

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

280

_formats = {

281

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

282

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

283

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

284

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

285

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

286

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

287

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

288

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

289

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

290

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

291

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

292

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

293

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

294

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

295

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

296

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

297

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

298

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

303

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

304

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

305

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

306

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

307

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

308

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

309

310

# Apple HTTP Live Streaming

311

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

312

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

313

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

314

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

315

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

316

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

317

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

318

319

# DASH mp4 video

320

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

321

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

322

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

323

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

324

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

325

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)

326

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

327

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

328

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},

329

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},

330

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

331

332

# Dash mp4 audio

333

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},

334

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},

335

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},

336

337

# Dash webm

338

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

339

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

340

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

341

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

342

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

343

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

344

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},

345

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

346

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

347

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

348

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

349

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

350

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

351

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

352

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

353

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

354

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

355

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

356

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

357

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

358

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

359

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

360

361

# Dash webm audio

362

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},

363

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},

364

365

# Dash webm audio with opus inside

366

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},

367

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},

368

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},

369

370

# RTMP (unnamed)

371

'_rtmp': {'protocol': 'rtmp'},

372

}

373

_SUBTITLE_FORMATS = ('ttml', 'vtt')

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

383

'uploader': 'Philipp Hagemeister',

384

'uploader_id': 'phihag',

385

'upload_date': '20121002',

386

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

387

'categories': ['Science & Technology'],

388

'tags': ['youtube-dl'],

389

'like_count': int,

390

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',

397

'note': 'Test generic use_cipher_signature video (#897)',

'info_dict': {

'id': 'UxxajLWwzqY',

'ext': 'mp4',

'upload_date': '20120506',

402

'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',

403

'alt_title': 'I Love It (feat. Charli XCX)',

404

'description': 'md5:782e8651347686cba06e58f71ab51773',

405

'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',

406

'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',

407

'iconic ep', 'iconic', 'love', 'it'],

408

'uploader': 'Icona Pop',

409

'uploader_id': 'IconaPop',

410

'creator': 'Icona Pop',

}

},

{

'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',

415

'note': 'Test VEVO video with age protection (#956)',

'info_dict': {

'id': '07FYdnEawAQ',

'ext': 'mp4',

'upload_date': '20130703',

420

'title': 'Justin Timberlake - Tunnel Vision (Explicit)',

421

'alt_title': 'Tunnel Vision',

422

'description': 'md5:64249768eec3bc4276236606ea996373',

423

'uploader': 'justintimberlakeVEVO',

424

'uploader_id': 'justintimberlakeVEVO',

425

'creator': 'Justin Timberlake',

'age_limit': 18,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

431

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

436

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

437

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

438

'uploader': 'SET India',

439

'uploader_id': 'setindia',

'age_limit': 18,

}

},

{

'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

445

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

450

'uploader': 'Philipp Hagemeister',

451

'uploader_id': 'phihag',

452

'upload_date': '20121002',

453

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

454

'categories': ['Science & Technology'],

455

'tags': ['youtube-dl'],

456

'like_count': int,

457

'dislike_count': int,

458

},

459

'params': {

460

'skip_download': True,

},

},

{

'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',

465

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

470

'uploader_id': '8KVIDEO',

471

'description': '',

472

'uploader': '8KVIDEO',

473

'title': 'UHDTV TEST 8K VIDEO.mp4'

474

},

475

'params': {

476

'youtube_include_dash_manifest': True,

'format': '141',

},

},

# DASH manifest with encrypted signature

481

{

482

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',

487

'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',

488

'uploader': 'AfrojackVEVO',

489

'uploader_id': 'AfrojackVEVO',

490

'upload_date': '20131011',

491

},

492

'params': {

493

'youtube_include_dash_manifest': True,

'format': '141',

},

},

# JS player signature function name containing $

498

{

499

'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',

'info_dict': {

'id': 'nfWlot6h_JM',

'ext': 'm4a',

'title': 'Taylor Swift - Shake It Off',

504

'alt_title': 'Shake It Off',

505

'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',

506

'uploader': 'TaylorSwiftVEVO',

507

'uploader_id': 'TaylorSwiftVEVO',

508

'upload_date': '20140818',

509

'creator': 'Taylor Swift',

510

},

511

'params': {

512

'youtube_include_dash_manifest': True,

'format': '141',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'upload_date': '20100909',

523

'uploader': 'The Amazing Atheist',

524

'uploader_id': 'TheAmazingAtheist',

525

'title': 'Burning Everyone\'s Koran',

526

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

527

}

528

},

529

# Normal age-gate video (No vevo, embed allowed)

530

{

531

'url': 'http://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

536

'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

537

'uploader': 'The Witcher',

538

'uploader_id': 'WitcherGame',

539

'upload_date': '20140605',

'age_limit': 18,

},

},

# Age-gate video with encrypted signature

544

{

545

'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',

'info_dict': {

'id': '6kLq3WMV1nU',

'ext': 'mp4',

'title': 'Dedication To My Ex (Miss That) (Lyric Video)',

550

'description': 'md5:33765bb339e1b47e7e72b5490139bb41',

551

'uploader': 'LloydVEVO',

552

'uploader_id': 'LloydVEVO',

553

'upload_date': '20110629',

'age_limit': 18,

},

},

# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)

558

{

559

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'upload_date': '20100430',

564

'uploader_id': 'deadmau5',

565

'creator': 'deadmau5',

566

'description': 'md5:12c56784b8032162bb936a5f76d55360',

567

'uploader': 'deadmau5',

568

'title': 'Deadmau5 - Some Chords (HD)',

569

'alt_title': 'Some Chords',

570

},

571

'expected_warnings': [

572

'DASH manifest missing',

573

]

574

},

575

# Olympics (https://github.com/rg3/youtube-dl/issues/4431)

576

{

577

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'upload_date': '20150827',

582

'uploader_id': 'olympic',

583

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

584

'uploader': 'Olympics',

585

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

586

},

587

'params': {

588

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

598

'upload_date': '20110310',

599

'uploader_id': 'AllenMeow',

600

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

601

'uploader': '孫艾倫',

602

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

603

},

604

},

605

# url_encoded_fmt_stream_map is empty string

606

{

607

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

612

'description': '',

613

'upload_date': '20150404',

614

'uploader_id': 'spbelect',

615

'uploader': 'Наблюдатели Петербурга',

616

},

617

'params': {

618

'skip_download': 'requires avconv',

619

},

620

'skip': 'This live event has ended.',

621

},

622

# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)

623

{

624

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'mp4',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

629

'description': 'md5:116377fd2963b81ec4ce64b542173306',

630

'upload_date': '20150625',

631

'uploader_id': 'dorappi2000',

632

'uploader': 'dorappi2000',

633

'formats': 'mincount:33',

634

},

635

},

636

# DASH manifest with segment_list

637

{

638

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

639

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

644

'uploader': 'Airtek',

645

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

646

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

647

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

648

},

649

'params': {

650

'youtube_include_dash_manifest': True,

651

'format': '135', # bestvideo

}

},

{

# Multifeed videos (multiple cameras), URL is for Main Camera

656

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

657

'info_dict': {

658

'id': 'jqWvoWXjCVs',

659

'title': 'teamPGP: Rocket League Noob Stream',

660

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

667

'description': 'md5:dc7872fb300e143831327f1bae3af010',

668

'upload_date': '20150721',

669

'uploader': 'Beer Games Beer',

670

'uploader_id': 'beergamesbeer',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

677

'description': 'md5:dc7872fb300e143831327f1bae3af010',

678

'upload_date': '20150721',

679

'uploader': 'Beer Games Beer',

680

'uploader_id': 'beergamesbeer',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

687

'description': 'md5:dc7872fb300e143831327f1bae3af010',

688

'upload_date': '20150721',

689

'uploader': 'Beer Games Beer',

690

'uploader_id': 'beergamesbeer',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

697

'description': 'md5:dc7872fb300e143831327f1bae3af010',

698

'upload_date': '20150721',

699

'uploader': 'Beer Games Beer',

700

'uploader_id': 'beergamesbeer',

},

}],

'params': {

'skip_download': True,

},

},

{

# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)

709

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

710

'info_dict': {

711

'id': 'gVfLd0zydlo',

712

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

},

'playlist_count': 2,

},

{

'url': 'http://vid.plus/FlRa-iH7PGw',

718

'only_matching': True,

719

},

720

{

721

# Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)

722

# Also tests cut-off URL expansion in video description (see

723

# https://github.com/rg3/youtube-dl/issues/1892,

724

# https://github.com/rg3/youtube-dl/issues/8164)

725

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

730

'alt_title': 'Dark Walk',

731

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

732

'upload_date': '20151119',

733

'uploader_id': 'IronSoulElf',

734

'uploader': 'IronSoulElf',

735

'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',

736

},

737

'params': {

738

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)

743

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

744

'only_matching': True,

745

},

746

{

747

# Video with yt:stretch=17:0

748

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

753

'description': 'md5:ee18a25c350637c8faff806845bddee9',

754

'upload_date': '20151107',

755

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

756

'uploader': 'CH GAMER DROID',

757

},

758

'params': {

759

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

764

'only_matching': True,

}

]

def __init__(self, *args, **kwargs):

769

super(YoutubeIE, self).__init__(*args, **kwargs)

770

self._player_cache = {}

771

772

def report_video_info_webpage_download(self, video_id):

773

"""Report attempt to download video info webpage."""

774

self.to_screen('%s: Downloading video info webpage' % video_id)

775

776

def report_information_extraction(self, video_id):

777

"""Report attempt to extract video information."""

778

self.to_screen('%s: Extracting video information' % video_id)

779

780

def report_unavailable_format(self, video_id, format):

781

"""Report extracted video URL."""

782

self.to_screen('%s: Format %s not available' % (video_id, format))

783

784

def report_rtmp_download(self):

785

"""Indicate the download will use the RTMP protocol."""

786

self.to_screen('RTMP download detected')

787

788

def _signature_cache_id(self, example_sig):

789

""" Return a string representation of a signature """

790

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

791

792

def _extract_signature_function(self, video_id, player_url, example_sig):

793

id_m = re.match(

794

r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',

795

player_url)

796

if not id_m:

797

raise ExtractorError('Cannot identify player %r' % player_url)

798

player_type = id_m.group('ext')

799

player_id = id_m.group('id')

800

801

# Read from filesystem cache

802

func_id = '%s_%s_%s' % (

803

player_type, player_id, self._signature_cache_id(example_sig))

804

assert os.path.basename(func_id) == func_id

805

806

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

807

if cache_spec is not None:

808

return lambda s: ''.join(s[i] for i in cache_spec)

809

810

download_note = (

811

'Downloading player %s' % player_url

812

if self._downloader.params.get('verbose') else

813

'Downloading %s player %s' % (player_type, player_id)

814

)

815

if player_type == 'js':

816

code = self._download_webpage(

817

player_url, video_id,

818

note=download_note,

819

errnote='Download of %s failed' % player_url)

820

res = self._parse_sig_js(code)

821

elif player_type == 'swf':

822

urlh = self._request_webpage(

823

player_url, video_id,

824

note=download_note,

825

errnote='Download of %s failed' % player_url)

826

code = urlh.read()

827

res = self._parse_sig_swf(code)

828

else:

829

assert False, 'Invalid player type %r' % player_type

830

831

test_string = ''.join(map(compat_chr, range(len(example_sig))))

832

cache_res = res(test_string)

833

cache_spec = [ord(c) for c in cache_res]

834

835

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

836

return res

837

838

def _print_sig_code(self, func, example_sig):

839

def gen_sig_code(idxs):

840

def _genslice(start, end, step):

841

starts = '' if start == 0 else str(start)

842

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

843

steps = '' if step == 1 else (':%d' % step)

844

return 's[%s%s%s]' % (starts, ends, steps)

845

846

step = None

847

# Quelch pyflakes warnings - start will be set when step is set

848

start = '(Never used)'

849

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

854

step = None

855

continue

856

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

866

867

test_string = ''.join(map(compat_chr, range(len(example_sig))))

868

cache_res = func(test_string)

869

cache_spec = [ord(c) for c in cache_res]

870

expr_code = ' + '.join(gen_sig_code(cache_spec))

871

signature_id_tuple = '(%s)' % (

872

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

873

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

874

' return %s\n') % (signature_id_tuple, expr_code)

875

self.to_screen('Extracted signature function:\n' + code)

876

877

def _parse_sig_js(self, jscode):

878

funcname = self._search_regex(

879

r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,

880

'Initial JS player signature function name')

881

882

jsi = JSInterpreter(jscode)

883

initial_function = jsi.extract_function(funcname)

884

return lambda s: initial_function([s])

885

886

def _parse_sig_swf(self, file_contents):

887

swfi = SWFInterpreter(file_contents)

888

TARGET_CLASSNAME = 'SignatureDecipher'

889

searched_class = swfi.extract_class(TARGET_CLASSNAME)

890

initial_function = swfi.extract_function(searched_class, 'decipher')

891

return lambda s: initial_function([s])

892

893

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

894

"""Turn the encrypted s field into a working signature"""

895

896

if player_url is None:

897

raise ExtractorError('Cannot decrypt signature without player_url')

898

899

if player_url.startswith('//'):

900

player_url = 'https:' + player_url

901

try:

902

player_id = (player_url, self._signature_cache_id(s))

903

if player_id not in self._player_cache:

904

func = self._extract_signature_function(

905

video_id, player_url, s

906

)

907

self._player_cache[player_id] = func

908

func = self._player_cache[player_id]

909

if self._downloader.params.get('youtube_print_sig_code'):

910

self._print_sig_code(func, s)

911

return func(s)

912

except Exception as e:

913

tb = traceback.format_exc()

914

raise ExtractorError(

915

'Signature extraction failed: ' + tb, cause=e)

916

917

def _get_subtitles(self, video_id, webpage):

918

try:

919

subs_doc = self._download_xml(

920

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

921

video_id, note=False)

922

except ExtractorError as err:

923

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

928

lang = track.attrib['lang_code']

929

if lang in sub_lang_list:

930

continue

931

sub_formats = []

932

for ext in self._SUBTITLE_FORMATS:

933

params = compat_urllib_parse.urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

938

})

939

sub_formats.append({

940

'url': 'https://www.youtube.com/api/timedtext?' + params,

941

'ext': ext,

942

})

943

sub_lang_list[lang] = sub_formats

944

if not sub_lang_list:

945

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

950

patterns = (

951

# User data may contain arbitrary character sequences that may affect

952

# JSON extraction with regex, e.g. when '};' is contained the second

953

# regex won't capture the whole JSON. Yet working around by trying more

954

# concrete regex first keeping in mind proper quoted string handling

955

# to be implemented in future that will replace this workaround (see

956

# https://github.com/rg3/youtube-dl/issues/7468,

957

# https://github.com/rg3/youtube-dl/pull/7599)

958

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

959

r';ytplayer\.config\s*=\s*({.+?});',

960

)

961

config = self._search_regex(

962

patterns, webpage, 'ytplayer.config', default=None)

963

if config:

964

return self._parse_json(

965

uppercase_escape(config), video_id, fatal=False)

966

967

def _get_automatic_captions(self, video_id, webpage):

968

"""We need the webpage for getting the captions url, pass it as an

969

argument to speed up the process."""

970

self.to_screen('%s: Looking for automatic captions' % video_id)

971

player_config = self._get_ytplayer_config(video_id, webpage)

972

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

973

if not player_config:

974

self._downloader.report_warning(err_msg)

975

return {}

976

try:

977

args = player_config['args']

978

caption_url = args.get('ttsurl')

979

if caption_url:

980

timestamp = args['timestamp']

981

# We get the available subtitles

982

list_params = compat_urllib_parse.urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

988

caption_list = self._download_xml(list_url, video_id)

989

original_lang_node = caption_list.find('track')

990

if original_lang_node is None:

991

self._downloader.report_warning('Video doesn\'t have automatic captions')

992

return {}

993

original_lang = original_lang_node.attrib['lang_code']

994

caption_kind = original_lang_node.attrib.get('kind', '')

995

996

sub_lang_list = {}

997

for lang_node in caption_list.findall('target'):

998

sub_lang = lang_node.attrib['lang_code']

999

sub_formats = []

1000

for ext in self._SUBTITLE_FORMATS:

1001

params = compat_urllib_parse.urlencode({

1002

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1007

})

1008

sub_formats.append({

1009

'url': caption_url + '&' + params,

1010

'ext': ext,

1011

})

1012

sub_lang_list[sub_lang] = sub_formats

1013

return sub_lang_list

1014

1015

# Some videos don't provide ttsurl but rather caption_tracks and

1016

# caption_translation_languages (e.g. 20LmZk1hakA)

1017

caption_tracks = args['caption_tracks']

1018

caption_translation_languages = args['caption_translation_languages']

1019

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1020

parsed_caption_url = compat_urlparse.urlparse(caption_url)

1021

caption_qs = compat_parse_qs(parsed_caption_url.query)

1022

1023

sub_lang_list = {}

1024

for lang in caption_translation_languages.split(','):

1025

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1026

sub_lang = lang_qs.get('lc', [None])[0]

if not sub_lang:

continue

sub_formats = []

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(

1036

query=compat_urllib_parse.urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

sub_lang_list[sub_lang] = sub_formats

1042

return sub_lang_list

1043

# An extractor error can be raise by the download process if there are

1044

# no automatic captions but there are subtitles

1045

except (KeyError, ExtractorError):

1046

self._downloader.report_warning(err_msg)

return {}

@classmethod

def extract_id(cls, url):

1051

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1052

if mobj is None:

1053

raise ExtractorError('Invalid URL: %s' % url)

1054

video_id = mobj.group(2)

1055

return video_id

1056

1057

def _extract_from_m3u8(self, manifest_url, video_id):

1058

url_map = {}

1059

1060

def _get_urls(_manifest):

1061

lines = _manifest.split('\n')

1062

urls = filter(lambda l: l and not l.startswith('#'),

1063

lines)

1064

return urls

1065

manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')

1066

formats_urls = _get_urls(manifest)

1067

for format_url in formats_urls:

1068

itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')

1069

url_map[itag] = format_url

1070

return url_map

1071

1072

def _extract_annotations(self, video_id):

1073

url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id

1074

return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')

1075

1076

def _real_extract(self, url):

1077

url, smuggled_data = unsmuggle_url(url, {})

1078

1079

proto = (

1080

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1086

for component in [parsed_url.fragment, parsed_url.query]:

1087

query = compat_parse_qs(component)

1088

if start_time is None and 't' in query:

1089

start_time = parse_duration(query['t'][0])

1090

if start_time is None and 'start' in query:

1091

start_time = parse_duration(query['start'][0])

1092

if end_time is None and 'end' in query:

1093

end_time = parse_duration(query['end'][0])

1094

1095

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1096

mobj = re.search(self._NEXT_URL_RE, url)

1097

if mobj:

1098

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1099

video_id = self.extract_id(url)

1100

1101

# Get video webpage

1102

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1103

video_webpage = self._download_webpage(url, video_id)

1104

1105

# Attempt to extract SWF player URL

1106

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1107

if mobj is not None:

1108

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1115

dash_mpd = video_info.get('dashmpd')

1116

if dash_mpd and dash_mpd[0] not in dash_mpds:

1117

dash_mpds.append(dash_mpd[0])

# Get video info

embed_webpage = None

is_live = None

if re.search(r'player-age-gate-content">', video_webpage) is not None:

1123

age_gate = True

1124

# We simulate the access to the video from www.youtube.com/v/{video_id}

1125

# this can be viewed without login into Youtube

1126

url = proto + '://www.youtube.com/embed/%s' % video_id

1127

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1128

data = compat_urllib_parse.urlencode({

1129

'video_id': video_id,

1130

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1131

'sts': self._search_regex(

1132

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1133

})

1134

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1135

video_info_webpage = self._download_webpage(

1136

video_info_url, video_id,

1137

note='Refetching age-gated info webpage',

1138

errnote='unable to download video info webpage')

1139

video_info = compat_parse_qs(video_info_webpage)

1140

add_dash_mpd(video_info)

else:

age_gate = False

video_info = None

# Try looking directly into the video webpage

1145

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1146

if ytplayer_config:

1147

args = ytplayer_config['args']

1148

if args.get('url_encoded_fmt_stream_map'):

1149

# Convert to the same format returned by compat_parse_qs

1150

video_info = dict((k, [v]) for k, v in args.items())

1151

add_dash_mpd(video_info)

1152

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1153

is_live = True

1154

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1155

# We also try looking in get_video_info since it may contain different dashmpd

1156

# URL that points to a DASH manifest with possibly different itag set (some itags

1157

# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH

1158

# manifest pointed by get_video_info's dashmpd).

1159

# The general idea is to take a union of itags of both DASH manifests (for example

1160

# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)

1161

self.report_video_info_webpage_download(video_id)

1162

for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:

1163

video_info_url = (

1164

'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'

1165

% (proto, video_id, el_type))

1166

video_info_webpage = self._download_webpage(

1167

video_info_url,

1168

video_id, note=False,

1169

errnote='unable to download video info webpage')

1170

get_video_info = compat_parse_qs(video_info_webpage)

1171

if get_video_info.get('use_cipher_signature') != ['True']:

1172

add_dash_mpd(get_video_info)

1173

if not video_info:

1174

video_info = get_video_info

1175

if 'token' in get_video_info:

1176

# Different get_video_info requests may report different results, e.g.

1177

# some may report video unavailability, but some may serve it without

1178

# any complaint (see https://github.com/rg3/youtube-dl/issues/7362,

1179

# the original webpage as well as el=info and el=embedded get_video_info

1180

# requests report video unavailability due to geo restriction while

1181

# el=detailpage succeeds and returns valid data). This is probably

1182

# due to YouTube measures against IP ranges of hosting providers.

1183

# Working around by preferring the first succeeded video_info containing

1184

# the token if no such video_info yet was found.

1185

if 'token' not in video_info:

1186

video_info = get_video_info

1187

break

1188

if 'token' not in video_info:

1189

if 'reason' in video_info:

1190

if 'The uploader has not made this video available in your country.' in video_info['reason']:

1191

regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)

1192

if regions_allowed:

1193

raise ExtractorError('YouTube said: This video is available in %s only' % (

1194

', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),

1195

expected=True)

1196

raise ExtractorError(

1197

'YouTube said: %s' % video_info['reason'][0],

1198

expected=True, video_id=video_id)

1199

else:

1200

raise ExtractorError(

1201

'"token" parameter not in video info for unknown reason',

video_id=video_id)

# title

if 'title' in video_info:

1206

video_title = video_info['title'][0]

1207

else:

1208

self._downloader.report_warning('Unable to extract video title')

video_title = '_'

# description

video_description = get_element_by_id("eow-description", video_webpage)

1213

if video_description:

1214

video_description = re.sub(r'''(?x)

1215

<a\s+

1216

(?:[a-zA-Z-]+="[^"]+"\s+)*?

1217

(?:title|href)="([^"]+)"\s+

1218

(?:[a-zA-Z-]+="[^"]+"\s+)*?

1219

class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>

1220

[^<]+\.{3}\s*

1221

</a>

1222

''', r'\1', video_description)

1223

video_description = clean_html(video_description)

1224

else:

1225

fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)

1226

if fd_mobj:

1227

video_description = unescapeHTML(fd_mobj.group(1))

1228

else:

1229

video_description = ''

1230

1231

if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):

1232

if not self._downloader.params.get('noplaylist'):

1233

entries = []

1234

feed_ids = []

1235

multifeed_metadata_list = video_info['multifeed_metadata_list'][0]

1236

for feed in multifeed_metadata_list.split(','):

1237

# Unquote should take place before split on comma (,) since textual

1238

# fields may contain comma as well (see

1239

# https://github.com/rg3/youtube-dl/issues/8536)

1240

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1241

entries.append({

1242

'_type': 'url_transparent',

1243

'ie_key': 'Youtube',

1244

'url': smuggle_url(

1245

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1246

{'force_singlefeed': True}),

1247

'title': '%s (%s)' % (video_title, feed_data['title'][0]),

1248

})

1249

feed_ids.append(feed_data['id'][0])

1250

self.to_screen(

1251

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1252

% (', '.join(feed_ids), video_id))

1253

return self.playlist_result(entries, video_id, video_title, video_description)

1254

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1255

1256

if 'view_count' in video_info:

1257

view_count = int(video_info['view_count'][0])

else:

view_count = None

# Check for "rental" videos

1262

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1263

raise ExtractorError('"rental" videos not supported')

1264

1265

# Start extracting information

1266

self.report_information_extraction(video_id)

1267

1268

# uploader

1269

if 'author' not in video_info:

1270

raise ExtractorError('Unable to extract uploader name')

1271

video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])

1272

1273

# uploader_id

1274

video_uploader_id = None

1275

mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)

1276

if mobj is not None:

1277

video_uploader_id = mobj.group(1)

1278

else:

1279

self._downloader.report_warning('unable to extract uploader nickname')

1280

1281

# thumbnail image

1282

# We try first to get a high quality image:

1283

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

1284

video_webpage, re.DOTALL)

1285

if m_thumb is not None:

1286

video_thumbnail = m_thumb.group(1)

1287

elif 'thumbnail_url' not in video_info:

1288

self._downloader.report_warning('unable to extract video thumbnail')

1289

video_thumbnail = None

1290

else: # don't panic if we can't find it

1291

video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])

1292

1293

# upload date

1294

upload_date = self._html_search_meta(

1295

'datePublished', video_webpage, 'upload date', default=None)

1296

if not upload_date:

1297

upload_date = self._search_regex(

1298

[r'(?s)id="eow-date.*?>(.*?)</span>',

1299

r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],

1300

video_webpage, 'upload date', default=None)

1301

if upload_date:

1302

upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())

1303

upload_date = unified_strdate(upload_date)

1304

1305

m_music = re.search(

1306

r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:$.+?$)?</li',

1307

video_webpage)

1308

if m_music:

1309

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

1310

video_creator = clean_html(m_music.group('creator'))

1311

else:

1312

video_alt_title = video_creator = None

1313

1314

m_cat_container = self._search_regex(

1315

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

1316

video_webpage, 'categories', default=None)

1317

if m_cat_container:

1318

category = self._html_search_regex(

1319

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

1320

default=None)

1321

video_categories = None if category is None else [category]

1322

else:

1323

video_categories = None

1324

1325

video_tags = [

1326

unescapeHTML(m.group('content'))

1327

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

1328

1329

def _extract_count(count_name):

1330

return str_to_int(self._search_regex(

1331

r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'

1332

% re.escape(count_name),

1333

video_webpage, count_name, default=None))

1334

1335

like_count = _extract_count('like')

1336

dislike_count = _extract_count('dislike')

1337

1338

# subtitles

1339

video_subtitles = self.extract_subtitles(video_id, video_webpage)

1340

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

1341

1342

if 'length_seconds' not in video_info:

1343

self._downloader.report_warning('unable to extract video duration')

1344

video_duration = None

1345

else:

1346

video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))

1347

1348

# annotations

1349

video_annotations = None

1350

if self._downloader.params.get('writeannotations', False):

1351

video_annotations = self._extract_annotations(video_id)

1352

1353

def _map_to_format_list(urlmap):

1354

formats = []

1355

for itag, video_real_url in urlmap.items():

1356

dct = {

1357

'format_id': itag,

1358

'url': video_real_url,

1359

'player_url': player_url,

1360

}

1361

if itag in self._formats:

1362

dct.update(self._formats[itag])

formats.append(dct)

return formats

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1367

self.report_rtmp_download()

1368

formats = [{

1369

'format_id': '_rtmp',

1370

'protocol': 'rtmp',

1371

'url': video_info['conn'][0],

1372

'player_url': player_url,

1373

}]

1374

elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:

1375

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1376

if 'rtmpe%3Dyes' in encoded_url_map:

1377

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)

1378

formats = []

1379

for url_data_str in encoded_url_map.split(','):

1380

url_data = compat_parse_qs(url_data_str)

1381

if 'itag' not in url_data or 'url' not in url_data:

1382

continue

1383

format_id = url_data['itag'][0]

1384

url = url_data['url'][0]

1385

1386

if 'sig' in url_data:

1387

url += '&signature=' + url_data['sig'][0]

1388

elif 's' in url_data:

1389

encrypted_sig = url_data['s'][0]

1390

ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'

1391

1392

jsplayer_url_json = self._search_regex(

1393

ASSETS_RE,

1394

embed_webpage if age_gate else video_webpage,

1395

'JS player URL (1)', default=None)

1396

if not jsplayer_url_json and not age_gate:

1397

# We need the embed website after all

1398

if embed_webpage is None:

1399

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

1400

embed_webpage = self._download_webpage(

1401

embed_url, video_id, 'Downloading embed webpage')

1402

jsplayer_url_json = self._search_regex(

1403

ASSETS_RE, embed_webpage, 'JS player URL')

1404

1405

player_url = json.loads(jsplayer_url_json)

1406

if player_url is None:

1407

player_url_json = self._search_regex(

1408

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

1409

video_webpage, 'age gate player URL')

1410

player_url = json.loads(player_url_json)

1411

1412

if self._downloader.params.get('verbose'):

1413

if player_url is None:

1414

player_version = 'unknown'

1415

player_desc = 'unknown'

1416

else:

1417

if player_url.endswith('swf'):

1418

player_version = self._search_regex(

1419

r'-(.+?)(?:/watch_as3)?\.swf$', player_url,

1420

'flash player', fatal=False)

1421

player_desc = 'flash player %s' % player_version

1422

else:

1423

player_version = self._search_regex(

1424

[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],

1425

player_url,

1426

'html5 player', fatal=False)

1427

player_desc = 'html5 player %s' % player_version

1428

1429

parts_sizes = self._signature_cache_id(encrypted_sig)

1430

self.to_screen('{%s} signature length %s, %s' %

1431

(format_id, parts_sizes, player_desc))

1432

1433

signature = self._decrypt_signature(

1434

encrypted_sig, video_id, player_url, age_gate)

1435

url += '&signature=' + signature

1436

if 'ratebypass' not in url:

1437

url += '&ratebypass=yes'

1438

1439

dct = {

1440

'format_id': format_id,

1441

'url': url,

1442

'player_url': player_url,

1443

}

1444

if format_id in self._formats:

1445

dct.update(self._formats[format_id])

1446

1447

# Some itags are not included in DASH manifest thus corresponding formats will

1448

# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).

1449

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

1450

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

1451

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

1452

1453

more_fields = {

1454

'filesize': int_or_none(url_data.get('clen', [None])[0]),

1455

'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),

1456

'width': width,

1457

'height': height,

1458

'fps': int_or_none(url_data.get('fps', [None])[0]),

1459

'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],

1460

}

1461

for key, value in more_fields.items():

1462

if value:

1463

dct[key] = value

1464

type_ = url_data.get('type', [None])[0]

1465

if type_:

1466

type_split = type_.split(';')

1467

kind_ext = type_split[0].split('/')

1468

if len(kind_ext) == 2:

1469

kind, _ = kind_ext

1470

dct['ext'] = mimetype2ext(type_split[0])

1471

if kind in ('audio', 'video'):

1472

codecs = None

1473

for mobj in re.finditer(

1474

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

1475

if mobj.group('key') == 'codecs':

1476

codecs = mobj.group('val')

1477

break

1478

if codecs:

1479

codecs = codecs.split(',')

1480

if len(codecs) == 2:

1481

acodec, vcodec = codecs[1], codecs[0]

1482

else:

1483

acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])

dct.update({

'acodec': acodec,

'vcodec': vcodec,

})

formats.append(dct)

elif video_info.get('hlsvp'):

1490

manifest_url = video_info['hlsvp'][0]

1491

url_map = self._extract_from_m3u8(manifest_url, video_id)

1492

formats = _map_to_format_list(url_map)

1493

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

1494

for a_format in formats:

1495

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

1496

else:

1497

unavailable_message = self._html_search_regex(

1498

r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',

1499

video_webpage, 'unavailable message', default=None)

1500

if unavailable_message:

1501

raise ExtractorError(unavailable_message, expected=True)

1502

raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')

1503

1504

# Look for the DASH manifest

1505

if self._downloader.params.get('youtube_include_dash_manifest', True):

1506

dash_mpd_fatal = True

1507

for mpd_url in dash_mpds:

1508

dash_formats = {}

1509

try:

1510

def decrypt_sig(mobj):

1511

s = mobj.group(1)

1512

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

1513

return '/signature/%s' % dec_s

1514

1515

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

1516

1517

for df in self._extract_mpd_formats(

1518

mpd_url, video_id, fatal=dash_mpd_fatal,

1519

formats_dict=self._formats):

1520

# Do not overwrite DASH format found in some previous DASH manifest

1521

if df['format_id'] not in dash_formats:

1522

dash_formats[df['format_id']] = df

1523

# Additional DASH manifests may end up in HTTP Error 403 therefore

1524

# allow them to fail without bug report message if we already have

1525

# some DASH manifest succeeded. This is temporary workaround to reduce

1526

# burst of bug reports until we figure out the reason and whether it

1527

# can be fixed at all.

1528

dash_mpd_fatal = False

1529

except (ExtractorError, KeyError) as e:

1530

self.report_warning(

1531

'Skipping DASH manifest: %r' % e, video_id)

1532

if dash_formats:

1533

# Remove the formats we found through non-DASH, they

1534

# contain less info and it can be wrong, because we use

1535

# fixed values (for example the resolution). See

1536

# https://github.com/rg3/youtube-dl/issues/5774 for an

1537

# example.

1538

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

1539

formats.extend(dash_formats.values())

1540

1541

# Check for malformed aspect ratio

1542

stretched_m = re.search(

1543

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

1544

video_webpage)

1545

if stretched_m:

1546

w = float(stretched_m.group('w'))

1547

h = float(stretched_m.group('h'))

1548

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

1549

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

1554

f['stretched_ratio'] = ratio

1555

1556

self._sort_formats(formats)

return {

'id': video_id,

'uploader': video_uploader,

1561

'uploader_id': video_uploader_id,

1562

'upload_date': upload_date,

1563

'creator': video_creator,

1564

'title': video_title,

1565

'alt_title': video_alt_title,

1566

'thumbnail': video_thumbnail,

1567

'description': video_description,

1568

'categories': video_categories,

1569

'tags': video_tags,

1570

'subtitles': video_subtitles,

1571

'automatic_captions': automatic_captions,

1572

'duration': video_duration,

1573

'age_limit': 18 if age_gate else 0,

1574

'annotations': video_annotations,

1575

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

1576

'view_count': view_count,

1577

'like_count': like_count,

1578

'dislike_count': dislike_count,

1579

'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),

1580

'formats': formats,

1581

'is_live': is_live,

1582

'start_time': start_time,

1583

'end_time': end_time,

}

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

1588

IE_DESC = 'YouTube.com playlists'

1589

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

youtube\.com/

(?:

\? (?:.*?[&;])*? (?:p|a|list)=

| p/

)

(

(?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}

1600

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})

1606

)"""

1607

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'

1608

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'

1609

IE_NAME = 'youtube:playlist'

1610

_TESTS = [{

1611

'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

1612

'info_dict': {

1613

'title': 'ytdl test PL',

1614

'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

},

'playlist_count': 3,

}, {

'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

1619

'info_dict': {

1620

'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

1621

'title': 'YDL_Empty_List',

},

'playlist_count': 0,

}, {

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

1626

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

1627

'info_dict': {

1628

'title': '29C3: Not my department',

1629

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

1630

},

1631

'playlist_count': 95,

1632

}, {

1633

'note': 'issue #673',

1634

'url': 'PLBB231211A4F62143',

1635

'info_dict': {

1636

'title': '[OLD]Team Fortress 2 (Class-based LP)',

1637

'id': 'PLBB231211A4F62143',

1638

},

1639

'playlist_mincount': 26,

1640

}, {

1641

'note': 'Large playlist',

1642

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

1643

'info_dict': {

1644

'title': 'Uploads from Cauchemar',

1645

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

1646

},

1647

'playlist_mincount': 799,

1648

}, {

1649

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

1650

'info_dict': {

1651

'title': 'YDL_safe_search',

1652

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

},

'playlist_count': 2,

}, {

'note': 'embedded',

'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

1662

}

1663

}, {

1664

'note': 'Embedded SWF player',

1665

'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

1670

}

1671

}, {

1672

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

1673

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

1674

'info_dict': {

1675

'title': 'Uploads from Interstellar Movie',

1676

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

1677

},

1678

'playlist_mincout': 21,

1679

}]

1680

1681

def _real_initialize(self):

1682

self._login()

1683

1684

def _extract_mix(self, playlist_id):

1685

# The mixes are generated from a single video

1686

# the id of the playlist is just 'RD' + video_id

1687

url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)

1688

webpage = self._download_webpage(

1689

url, playlist_id, 'Downloading Youtube mix')

1690

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

1691

title_span = (

1692

search_title('playlist-title') or

1693

search_title('title long-title') or

1694

search_title('title'))

1695

title = clean_html(title_span)

1696

ids = orderedSet(re.findall(

1697

r'''(?xs)data-video-username=".*?".*?

1698

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

1699

webpage))

1700

url_results = self._ids_to_results(ids)

1701

1702

return self.playlist_result(url_results, playlist_id, title)

1703

1704

def _extract_playlist(self, playlist_id):

1705

url = self._TEMPLATE_URL % playlist_id

1706

page = self._download_webpage(url, playlist_id)

1707

1708

for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):

1709

match = match.strip()

1710

# Check if the playlist exists or is private

1711

if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):

1712

raise ExtractorError(

1713

'The playlist doesn\'t exist or is private, use --username or '

1714

'--netrc to access it.',

1715

expected=True)

1716

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

1717

raise ExtractorError(

1718

'Invalid parameters. Maybe URL is incorrect.',

1719

expected=True)

1720

elif re.match(r'[^<]*Choose your language[^<]*', match):

1721

continue

1722

else:

1723

self.report_warning('Youtube gives an alert message: ' + match)

1724

1725

playlist_title = self._html_search_regex(

1726

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

1727

page, 'title')

1728

1729

return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)

1730

1731

def _check_download_just_video(self, url, playlist_id):

1732

# Check if it's a video-specific URL

1733

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

1734

if 'v' in query_dict:

1735

video_id = query_dict['v'][0]

1736

if self._downloader.params.get('noplaylist'):

1737

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1738

return self.url_result(video_id, 'Youtube', video_id=video_id)

1739

else:

1740

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

1741

1742

def _real_extract(self, url):

1743

# Extract playlist id

1744

mobj = re.match(self._VALID_URL, url)

1745

if mobj is None:

1746

raise ExtractorError('Invalid URL: %s' % url)

1747

playlist_id = mobj.group(1) or mobj.group(2)

1748

1749

video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith('RD') or playlist_id.startswith('UL'):

1754

# Mixes require a custom extraction process

1755

return self._extract_mix(playlist_id)

1756

1757

return self._extract_playlist(playlist_id)

1758

1759

1760

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

1761

IE_DESC = 'YouTube.com channels'

1762

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'

1763

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

1764

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

1765

IE_NAME = 'youtube:channel'

1766

_TESTS = [{

1767

'note': 'paginated channel',

1768

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

1769

'playlist_mincount': 91,

1770

'info_dict': {

1771

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

1772

'title': 'Uploads from lex will',

1773

}

1774

}, {

1775

'note': 'Age restricted channel',

1776

# from https://www.youtube.com/user/DeusExOfficial

1777

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

1778

'playlist_mincount': 64,

1779

'info_dict': {

1780

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

1781

'title': 'Uploads from Deus Ex',

},

}]

@classmethod

def suitable(cls, url):

1787

return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)

1788

1789

def _real_extract(self, url):

1790

channel_id = self._match_id(url)

1791

1792

url = self._TEMPLATE_URL % channel_id

1793

1794

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

1795

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

1796

# otherwise fallback on channel by page extraction

1797

channel_page = self._download_webpage(

1798

url + '?view=57', channel_id,

1799

'Downloading channel page', fatal=False)

1800

if channel_page is False:

1801

channel_playlist_id = False

1802

else:

1803

channel_playlist_id = self._html_search_meta(

1804

'channelId', channel_page, 'channel id', default=None)

1805

if not channel_playlist_id:

1806

channel_playlist_id = self._search_regex(

1807

r'data-(?:channel-external-|yt)id="([^"]+)"',

1808

channel_page, 'channel id', default=None)

1809

if channel_playlist_id and channel_playlist_id.startswith('UC'):

1810

playlist_id = 'UU' + channel_playlist_id[2:]

1811

return self.url_result(

1812

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

1813

1814

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

1815

autogenerated = re.search(r'''(?x)

1816

class="[^"]*?(?:

1817

channel-header-autogenerated-label|

1818

yt-channel-title-autogenerated

1819

)[^"]*"''', channel_page) is not None

1820

1821

if autogenerated:

1822

# The videos are contained in a single page

1823

# the ajax pages can't be used, they are empty

1824

entries = [

1825

self.url_result(

1826

video_id, 'Youtube', video_id=video_id,

1827

video_title=video_title)

1828

for video_id, video_title in self.extract_videos_from_page(channel_page)]

1829

return self.playlist_result(entries, channel_id)

1830

1831

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

1832

1833

1834

class YoutubeUserIE(YoutubeChannelIE):

1835

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

1836

_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'

1837

_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'

1838

IE_NAME = 'youtube:user'

1839

1840

_TESTS = [{

1841

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

1842

'playlist_mincount': 320,

1843

'info_dict': {

1844

'title': 'TheLinuxFoundation',

1845

}

1846

}, {

1847

'url': 'ytuser:phihag',

1848

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

1853

# Don't return True if the url can be extracted with other youtube

1854

# extractor, the regex would is too permissive and it would match.

1855

other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)

1856

if any(ie.suitable(url) for ie in other_ies):

1857

return False

1858

else:

1859

return super(YoutubeUserIE, cls).suitable(url)

1860

1861

1862

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

1863

IE_DESC = 'YouTube.com user/channel playlists'

1864

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'

1865

IE_NAME = 'youtube:playlists'

1866

1867

_TESTS = [{

1868

'url': 'http://www.youtube.com/user/ThirstForScience/playlists',

1869

'playlist_mincount': 4,

1870

'info_dict': {

1871

'id': 'ThirstForScience',

1872

'title': 'Thirst for Science',

1873

},

1874

}, {

1875

# with "Load more" button

1876

'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

1877

'playlist_mincount': 70,

1878

'info_dict': {

1879

'id': 'igorkle1',

1880

'title': 'Игорь Клейнер',

1881

},

1882

}, {

1883

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

1884

'playlist_mincount': 17,

1885

'info_dict': {

1886

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

1887

'title': 'Chem Player',

},

}]

class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):

1893

IE_DESC = 'YouTube.com searches'

1894

# there doesn't appear to be a real limit, for example if you search for

1895

# 'python' you get more than 8.000.000 results

1896

_MAX_RESULTS = float('inf')

1897

IE_NAME = 'youtube:search'

1898

_SEARCH_KEY = 'ytsearch'

1899

_EXTRA_QUERY_ARGS = {}

1900

_TESTS = []

1901

1902

def _get_n_results(self, query, n):

1903

"""Get a specified number of results for a query"""

videos = []

limit = n

for pagenum in itertools.count(1):

1909

url_query = {

1910

'search_query': query.encode('utf-8'),

'page': pagenum,

'spf': 'navigate',

}

url_query.update(self._EXTRA_QUERY_ARGS)

1915

result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)

1916

data = self._download_json(

1917

result_url, video_id='query "%s"' % query,

1918

note='Downloading page %s' % pagenum,

1919

errnote='Unable to download API page')

1920

html_content = data[1]['body']['content']

1921

1922

if 'class="search-message' in html_content:

1923

raise ExtractorError(

1924

'[youtube] No video results', expected=True)

1925

1926

new_videos = self._ids_to_results(orderedSet(re.findall(

1927

r'href="/watch\?v=(.{11})', html_content)))

1928

videos += new_videos

1929

if not new_videos or len(videos) > limit:

break

if len(videos) > n:

videos = videos[:n]

return self.playlist_result(videos, query)

1935

1936

1937

class YoutubeSearchDateIE(YoutubeSearchIE):

1938

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

1939

_SEARCH_KEY = 'ytsearchdate'

1940

IE_DESC = 'YouTube.com searches, newest videos first'

1941

_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}

1942

1943

1944

class YoutubeSearchURLIE(InfoExtractor):

1945

IE_DESC = 'YouTube.com search URLs'

1946

IE_NAME = 'youtube:search_url'

1947

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

1948

_TESTS = [{

1949

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

1950

'playlist_mincount': 5,

1951

'info_dict': {

1952

'title': 'youtube-dl test video',

1953

}

1954

}, {

1955

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

1956

'only_matching': True,

1957

}]

1958

1959

def _real_extract(self, url):

1960

mobj = re.match(self._VALID_URL, url)

1961

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

1962

1963

webpage = self._download_webpage(url, query)

1964

result_code = self._search_regex(

1965

r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')

1966

1967

part_codes = re.findall(

1968

r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)

1969

entries = []

1970

for part_code in part_codes:

1971

part_title = self._html_search_regex(

1972

[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)

1973

part_url_snippet = self._html_search_regex(

1974

r'(?s)href="([^"]+)"', part_code, 'item URL')

1975

part_url = compat_urlparse.urljoin(

1976

'https://www.youtube.com/', part_url_snippet)

entries.append({

'_type': 'url',

'url': part_url,

'title': part_title,

})

return {

'_type': 'playlist',

'entries': entries,

'title': query,

}

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

1991

IE_DESC = 'YouTube.com (multi-season) shows'

1992

_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'

1993

IE_NAME = 'youtube:show'

1994

_TESTS = [{

1995

'url': 'https://www.youtube.com/show/airdisasters',

1996

'playlist_mincount': 5,

1997

'info_dict': {

1998

'id': 'airdisasters',

1999

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

2004

playlist_id = self._match_id(url)

2005

return super(YoutubeShowIE, self)._real_extract(

2006

'https://www.youtube.com/show/%s/playlists' % playlist_id)

2007

2008

2009

class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

2010

"""

2011

Base class for feed extractors

2012

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

2013

"""

2014

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

2019

2020

def _real_initialize(self):

2021

self._login()

2022

2023

def _real_extract(self, url):

2024

page = self._download_webpage(

2025

'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)

2026

2027

# The extraction process is the same as for playlists, but the regex

2028

# for the video ids doesn't contain an index

2029

ids = []

2030

more_widget_html = content_html = page

2031

for page_num in itertools.count(1):

2032

matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)

2033

2034

# 'recommended' feed has infinite 'load more' and each new portion spins

2035

# the same videos in (sometimes) slightly different order, so we'll check

2036

# for unicity and break when portion has no new videos

2037

new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))

if not new_ids:

break

ids.extend(new_ids)

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

2048

'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,

2049

'Downloading page #%s' % page_num,

2050

transform_source=uppercase_escape)

2051

content_html = more['content_html']

2052

more_widget_html = more['load_more_widget_html']

2053

2054

return self.playlist_result(

2055

self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)

2056

2057

2058

class YoutubeWatchLaterIE(YoutubePlaylistIE):

2059

IE_NAME = 'youtube:watchlater'

2060

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

2061

_VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

2062

2063

_TESTS = [{

2064

'url': 'https://www.youtube.com/playlist?list=WL',

2065

'only_matching': True,

2066

}, {

2067

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

2068

'only_matching': True,

2069

}]

2070

2071

def _real_extract(self, url):

2072

video = self._check_download_just_video(url, 'WL')

2073

if video:

2074

return video

2075

return self._extract_playlist('WL')

2076

2077

2078

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

2079

IE_NAME = 'youtube:favorites'

2080

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

2081

_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

2082

_LOGIN_REQUIRED = True

2083

2084

def _real_extract(self, url):

2085

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

2086

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

2087

return self.url_result(playlist_id, 'YoutubePlaylist')

2088

2089

2090

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

2091

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

2092

_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'

2093

_FEED_NAME = 'recommended'

2094

_PLAYLIST_TITLE = 'Youtube Recommended videos'

2095

2096

2097

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

2098

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

2099

_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

2100

_FEED_NAME = 'subscriptions'

2101

_PLAYLIST_TITLE = 'Youtube Subscriptions'

2102

2103

2104

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

2105

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

2106

_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'

2107

_FEED_NAME = 'history'

2108

_PLAYLIST_TITLE = 'Youtube History'

2109

2110

2111

class YoutubeTruncatedURLIE(InfoExtractor):

2112

IE_NAME = 'youtube:truncated_url'

2113

IE_DESC = False # Do not list

2114

_VALID_URL = r'''(?x)

2115

(?:https?://)?

2116

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

2117

(?:watch\?(?:

2118

feature=[a-z_]+|

2119

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',

2132

'only_matching': True,

2133

}, {

2134

'url': 'http://www.youtube.com/watch?',

2135

'only_matching': True,

2136

}, {

2137

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

2138

'only_matching': True,

2139

}, {

2140

'url': 'https://www.youtube.com/watch?feature=foo',

2141

'only_matching': True,

2142

}, {

2143

'url': 'https://www.youtube.com/watch?hl=en-GB',

2144

'only_matching': True,

2145

}, {

2146

'url': 'https://www.youtube.com/watch?t=2372',

2147

'only_matching': True,

2148

}]

2149

2150

def _real_extract(self, url):

2151

raise ExtractorError(

2152

'Did you forget to quote the URL? Remember that & is a meta '

2153

'character in most shells, so you want to put the URL in quotes, '

2154

'like youtube-dl '

2155

'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

2156

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

2161

IE_NAME = 'youtube:truncated_id'

2162

IE_DESC = False # Do not list

2163

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

2164

2165

_TESTS = [{

2166

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

2167

'only_matching': True,

2168

}]

2169

2170

def _real_extract(self, url):

2171

video_id = self._match_id(url)

2172

raise ExtractorError(

2173

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

2174

expected=True)