jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	clean_html,
	31	dict_get,
	32	error_to_compat_str,
	33	ExtractorError,
	34	float_or_none,
	35	get_element_by_attribute,
	36	get_element_by_id,
	37	int_or_none,
	38	mimetype2ext,
	39	orderedSet,
	40	parse_codecs,
	41	parse_duration,
	42	qualities,
	43	remove_quotes,
	44	remove_start,
	45	smuggle_url,
	46	str_or_none,
	47	str_to_int,
	48	try_get,
	49	unescapeHTML,
	50	unified_strdate,
	51	unsmuggle_url,
	52	uppercase_escape,
	53	url_or_none,
	54	urlencode_postdata,
	55	)
	56
	57
	58	class YoutubeBaseInfoExtractor(InfoExtractor):
	59	"""Provide base functions for Youtube extractors"""
	60	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	61	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	62
	63	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	64	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	65	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	66
	67	_NETRC_MACHINE = 'youtube'
	68	# If True it will raise an error if no login info is provided
	69	_LOGIN_REQUIRED = False
	70
	71	_PLAYLIST_ID_RE = r'(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|OLAK5uy_)[0-9A-Za-z-_]{10,}'
	72
	73	def _set_language(self):
	74	self._set_cookie(
	75	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	76	# YouTube sets the expire time to about two months
	77	expire_time=time.time() + 2 * 30 * 24 * 3600)
	78
	79	def _ids_to_results(self, ids):
	80	return [
	81	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	82	for vid_id in ids]
	83
	84	def _login(self):
	85	"""
	86	Attempt to log in to YouTube.
	87	True is returned if successful or skipped.
	88	False is returned if login failed.
	89
	90	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	91	"""
	92	username, password = self._get_login_info()
	93	# No authentication to be performed
	94	if username is None:
	95	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	96	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	97	return True
	98
	99	login_page = self._download_webpage(
	100	self._LOGIN_URL, None,
	101	note='Downloading login page',
	102	errnote='unable to fetch login page', fatal=False)
	103	if login_page is False:
	104	return
	105
	106	login_form = self._hidden_inputs(login_page)
	107
	108	def req(url, f_req, note, errnote):
	109	data = login_form.copy()
	110	data.update({
	111	'pstMsg': 1,
	112	'checkConnection': 'youtube',
	113	'checkedDomains': 'youtube',
	114	'hl': 'en',
	115	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	116	'f.req': json.dumps(f_req),
	117	'flowName': 'GlifWebSignIn',
	118	'flowEntry': 'ServiceLogin',
	119	})
	120	return self._download_json(
	121	url, None, note=note, errnote=errnote,
	122	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	123	fatal=False,
	124	data=urlencode_postdata(data), headers={
	125	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	126	'Google-Accounts-XSRF': 1,
	127	})
	128
	129	def warn(message):
	130	self._downloader.report_warning(message)
	131
	132	lookup_req = [
	133	username,
	134	None, [], None, 'US', None, None, 2, False, True,
	135	[
	136	None, None,
	137	[2, 1, None, 1,
	138	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	139	None, [], 4],
	140	1, [None, None, []], None, None, None, True
	141	],
	142	username,
	143	]
	144
	145	lookup_results = req(
	146	self._LOOKUP_URL, lookup_req,
	147	'Looking up account info', 'Unable to look up account info')
	148
	149	if lookup_results is False:
	150	return False
	151
	152	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	153	if not user_hash:
	154	warn('Unable to extract user hash')
	155	return False
	156
	157	challenge_req = [
	158	user_hash,
	159	None, 1, None, [1, None, None, None, [password, None, True]],
	160	[
	161	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	162	1, [None, None, []], None, None, None, True
	163	]]
	164
	165	challenge_results = req(
	166	self._CHALLENGE_URL, challenge_req,
	167	'Logging in', 'Unable to log in')
	168
	169	if challenge_results is False:
	170	return
	171
	172	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	173	if login_res:
	174	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	175	warn(
	176	'Unable to login: %s' % 'Invalid password'
	177	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	178	return False
	179
	180	res = try_get(challenge_results, lambda x: x[0][-1], list)
	181	if not res:
	182	warn('Unable to extract result entry')
	183	return False
	184
	185	login_challenge = try_get(res, lambda x: x[0][0], list)
	186	if login_challenge:
	187	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	188	if challenge_str == 'TWO_STEP_VERIFICATION':
	189	# SEND_SUCCESS - TFA code has been successfully sent to phone
	190	# QUOTA_EXCEEDED - reached the limit of TFA codes
	191	status = try_get(login_challenge, lambda x: x[5], compat_str)
	192	if status == 'QUOTA_EXCEEDED':
	193	warn('Exceeded the limit of TFA codes, try later')
	194	return False
	195
	196	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	197	if not tl:
	198	warn('Unable to extract TL')
	199	return False
	200
	201	tfa_code = self._get_tfa_info('2-step verification code')
	202
	203	if not tfa_code:
	204	warn(
	205	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	206	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	207	return False
	208
	209	tfa_code = remove_start(tfa_code, 'G-')
	210
	211	tfa_req = [
	212	user_hash, None, 2, None,
	213	[
	214	9, None, None, None, None, None, None, None,
	215	[None, tfa_code, True, 2]
	216	]]
	217
	218	tfa_results = req(
	219	self._TFA_URL.format(tl), tfa_req,
	220	'Submitting TFA code', 'Unable to submit TFA code')
	221
	222	if tfa_results is False:
	223	return False
	224
	225	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	226	if tfa_res:
	227	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	228	warn(
	229	'Unable to finish TFA: %s' % 'Invalid TFA code'
	230	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	231	return False
	232
	233	check_cookie_url = try_get(
	234	tfa_results, lambda x: x[0][-1][2], compat_str)
	235	else:
	236	CHALLENGES = {
	237	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	238	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	239	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	240	}
	241	challenge = CHALLENGES.get(
	242	challenge_str,
	243	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	244	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	245	return False
	246	else:
	247	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	248
	249	if not check_cookie_url:
	250	warn('Unable to extract CheckCookie URL')
	251	return False
	252
	253	check_cookie_results = self._download_webpage(
	254	check_cookie_url, None, 'Checking cookie', fatal=False)
	255
	256	if check_cookie_results is False:
	257	return False
	258
	259	if 'https://myaccount.google.com/' not in check_cookie_results:
	260	warn('Unable to log in')
	261	return False
	262
	263	return True
	264
	265	def _download_webpage_handle(self, args, *kwargs):
	266	query = kwargs.get('query', {}).copy()
	267	query['disable_polymer'] = 'true'
	268	kwargs['query'] = query
	269	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	270	args, *compat_kwargs(kwargs))
	271
	272	def _real_initialize(self):
	273	if self._downloader is None:
	274	return
	275	self._set_language()
	276	if not self._login():
	277	return
	278
	279
	280	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	281	# Extract entries from page with "Load more" button
	282	def _entries(self, page, playlist_id):
	283	more_widget_html = content_html = page
	284	for page_num in itertools.count(1):
	285	for entry in self._process_page(content_html):
	286	yield entry
	287
	288	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	289	if not mobj:
	290	break
	291
	292	count = 0
	293	retries = 3
	294	while count <= retries:
	295	try:
	296	# Downloading page may result in intermittent 5xx HTTP error
	297	# that is usually worked around with a retry
	298	more = self._download_json(
	299	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	300	'Downloading page #%s%s'
	301	% (page_num, ' (retry #%d)' % count if count else ''),
	302	transform_source=uppercase_escape)
	303	break
	304	except ExtractorError as e:
	305	if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
	306	count += 1
	307	if count <= retries:
	308	continue
	309	raise
	310
	311	content_html = more['content_html']
	312	if not content_html.strip():
	313	# Some webpages show a "Load more" button but they don't
	314	# have more videos
	315	break
	316	more_widget_html = more['load_more_widget_html']
	317
	318
	319	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	320	def _process_page(self, content):
	321	for video_id, video_title in self.extract_videos_from_page(content):
	322	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	323
	324	def extract_videos_from_page(self, page):
	325	ids_in_page = []
	326	titles_in_page = []
	327	for mobj in re.finditer(self._VIDEO_RE, page):
	328	# The link with index 0 is not the first video of the playlist (not sure if still actual)
	329	if 'index' in mobj.groupdict() and mobj.group('id') == '0':
	330	continue
	331	video_id = mobj.group('id')
	332	video_title = unescapeHTML(mobj.group('title'))
	333	if video_title:
	334	video_title = video_title.strip()
	335	try:
	336	idx = ids_in_page.index(video_id)
	337	if video_title and not titles_in_page[idx]:
	338	titles_in_page[idx] = video_title
	339	except ValueError:
	340	ids_in_page.append(video_id)
	341	titles_in_page.append(video_title)
	342	return zip(ids_in_page, titles_in_page)
	343
	344
	345	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	346	def _process_page(self, content):
	347	for playlist_id in orderedSet(re.findall(
	348	r'<h3[^>]+class="[^"]yt-lockup-title[^"]"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
	349	content)):
	350	yield self.url_result(
	351	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	352
	353	def _real_extract(self, url):
	354	playlist_id = self._match_id(url)
	355	webpage = self._download_webpage(url, playlist_id)
	356	title = self._og_search_title(webpage, fatal=False)
	357	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	358
	359
	360	class YoutubeIE(YoutubeBaseInfoExtractor):
	361	IE_DESC = 'YouTube.com'
	362	_VALID_URL = r"""(?x)^
	363	(
	364	(?:https?://\|//) # http(s):// or protocol-independent URL
	365	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	366	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	367	(?:www\.)?pwnyoutube\.com/\|
	368	(?:www\.)?hooktube\.com/\|
	369	(?:www\.)?yourepeat\.com/\|
	370	tube\.majestyc\.net/\|
	371	(?:(?:www\|dev)\.)?invidio\.us/\|
	372	(?:www\.)?invidiou\.sh/\|
	373	(?:www\.)?invidious\.snopyta\.org/\|
	374	(?:www\.)?invidious\.kabi\.tk/\|
	375	(?:www\.)?vid\.wxzm\.sx/\|
	376	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	377	(?:.*?\#/)? # handle anchor (#/) redirect urls
	378	(?: # the various things that can precede the ID:
	379	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	380	\|(?: # or the v= param in all its forms
	381	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	382	(?:\?\|\#!?) # the params delimiter ? or # or #!
	383	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	384	v=
	385	)
	386	))
	387	\|(?:
	388	youtu\.be\| # just youtu.be/xxxx
	389	vid\.plus\| # or vid.plus/xxxx
	390	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	391	)/
	392	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	393	)
	394	)? # all until now is optional -> you can pass the naked ID
	395	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	396	(?!.*?\blist=
	397	(?:
	398	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	399	WL # WL are handled by the watch later IE
	400	)
	401	)
	402	(?(1).+)? # if we found the ID, everything can follow
	403	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	404	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	405	_formats = {
	406	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	407	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	408	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	409	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	410	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	411	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	412	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	413	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	414	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	415	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	416	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	417	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	418	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	419	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	420	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	421	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	422	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	423	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	424
	425
	426	# 3D videos
	427	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	428	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	429	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	430	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	431	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	432	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	433	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	434
	435	# Apple HTTP Live Streaming
	436	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	437	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	438	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	439	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	440	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	441	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	442	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	443	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	444
	445	# DASH mp4 video
	446	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	447	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	448	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	449	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	450	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	451	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	452	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	453	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	454	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	455	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	456	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	457	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	458
	459	# Dash mp4 audio
	460	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
	461	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
	462	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
	463	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	464	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	465	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
	466	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
	467
	468	# Dash webm
	469	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	470	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	471	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	472	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	473	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	474	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	475	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
	476	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	477	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	478	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	479	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	480	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	481	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	482	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	483	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	484	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	485	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	486	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	487	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	488	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	489	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	490	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	491
	492	# Dash webm audio
	493	'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
	494	'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
	495
	496	# Dash webm audio with opus inside
	497	'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
	498	'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
	499	'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
	500

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

clean_html,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

mimetype2ext,

orderedSet,

parse_codecs,

parse_duration,

qualities,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

url_or_none,

urlencode_postdata,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

59

"""Provide base functions for Youtube extractors"""

60

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

61

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

62

63

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

64

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

65

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

66

67

_NETRC_MACHINE = 'youtube'

68

# If True it will raise an error if no login info is provided

69

_LOGIN_REQUIRED = False

70

71

_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'

72

73

def _set_language(self):

74

self._set_cookie(

75

'.youtube.com', 'PREF', 'f1=50000000&hl=en',

76

# YouTube sets the expire time to about two months

77

expire_time=time.time() + 2 * 30 * 24 * 3600)

78

79

def _ids_to_results(self, ids):

80

return [

81

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

87

True is returned if successful or skipped.

88

False is returned if login failed.

89

90

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

91

"""

92

username, password = self._get_login_info()

93

# No authentication to be performed

94

if username is None:

95

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

96

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

97

return True

98

99

login_page = self._download_webpage(

100

self._LOGIN_URL, None,

101

note='Downloading login page',

102

errnote='unable to fetch login page', fatal=False)

103

if login_page is False:

104

return

105

106

login_form = self._hidden_inputs(login_page)

107

108

def req(url, f_req, note, errnote):

109

data = login_form.copy()

110

data.update({

111

'pstMsg': 1,

112

'checkConnection': 'youtube',

113

'checkedDomains': 'youtube',

114

'hl': 'en',

115

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

116

'f.req': json.dumps(f_req),

117

'flowName': 'GlifWebSignIn',

118

'flowEntry': 'ServiceLogin',

119

})

120

return self._download_json(

121

url, None, note=note, errnote=errnote,

122

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

123

fatal=False,

124

data=urlencode_postdata(data), headers={

125

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

126

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

139

None, [], 4],

140

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

146

self._LOOKUP_URL, lookup_req,

147

'Looking up account info', 'Unable to look up account info')

148

149

if lookup_results is False:

150

return False

151

152

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

153

if not user_hash:

154

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

160

[

161

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

162

1, [None, None, []], None, None, None, True

163

]]

164

165

challenge_results = req(

166

self._CHALLENGE_URL, challenge_req,

167

'Logging in', 'Unable to log in')

168

169

if challenge_results is False:

170

return

171

172

login_res = try_get(challenge_results, lambda x: x[0][5], list)

173

if login_res:

174

login_msg = try_get(login_res, lambda x: x[5], compat_str)

175

warn(

176

'Unable to login: %s' % 'Invalid password'

177

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

178

return False

179

180

res = try_get(challenge_results, lambda x: x[0][-1], list)

181

if not res:

182

warn('Unable to extract result entry')

183

return False

184

185

login_challenge = try_get(res, lambda x: x[0][0], list)

186

if login_challenge:

187

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

188

if challenge_str == 'TWO_STEP_VERIFICATION':

189

# SEND_SUCCESS - TFA code has been successfully sent to phone

190

# QUOTA_EXCEEDED - reached the limit of TFA codes

191

status = try_get(login_challenge, lambda x: x[5], compat_str)

192

if status == 'QUOTA_EXCEEDED':

193

warn('Exceeded the limit of TFA codes, try later')

194

return False

195

196

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

197

if not tl:

198

warn('Unable to extract TL')

199

return False

200

201

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

206

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

207

return False

208

209

tfa_code = remove_start(tfa_code, 'G-')

210

211

tfa_req = [

212

user_hash, None, 2, None,

213

[

214

9, None, None, None, None, None, None, None,

215

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

220

'Submitting TFA code', 'Unable to submit TFA code')

221

222

if tfa_results is False:

223

return False

224

225

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

226

if tfa_res:

227

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

228

warn(

229

'Unable to finish TFA: %s' % 'Invalid TFA code'

230

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

231

return False

232

233

check_cookie_url = try_get(

234

tfa_results, lambda x: x[0][-1][2], compat_str)

235

else:

236

CHALLENGES = {

237

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

238

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

239

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

240

}

241

challenge = CHALLENGES.get(

242

challenge_str,

243

'%s returned error %s.' % (self.IE_NAME, challenge_str))

244

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

245

return False

246

else:

247

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

248

249

if not check_cookie_url:

250

warn('Unable to extract CheckCookie URL')

251

return False

252

253

check_cookie_results = self._download_webpage(

254

check_cookie_url, None, 'Checking cookie', fatal=False)

255

256

if check_cookie_results is False:

257

return False

258

259

if 'https://myaccount.google.com/' not in check_cookie_results:

260

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

266

query = kwargs.get('query', {}).copy()

267

query['disable_polymer'] = 'true'

268

kwargs['query'] = query

269

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

270

*args, **compat_kwargs(kwargs))

271

272

def _real_initialize(self):

273

if self._downloader is None:

274

return

275

self._set_language()

276

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

281

# Extract entries from page with "Load more" button

282

def _entries(self, page, playlist_id):

283

more_widget_html = content_html = page

284

for page_num in itertools.count(1):

285

for entry in self._process_page(content_html):

286

yield entry

287

288

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

count = 0

retries = 3

while count <= retries:

295

try:

296

# Downloading page may result in intermittent 5xx HTTP error

297

# that is usually worked around with a retry

298

more = self._download_json(

299

'https://youtube.com/%s' % mobj.group('more'), playlist_id,

300

'Downloading page #%s%s'

301

% (page_num, ' (retry #%d)' % count if count else ''),

302

transform_source=uppercase_escape)

303

break

304

except ExtractorError as e:

305

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

content_html = more['content_html']

312

if not content_html.strip():

313

# Some webpages show a "Load more" button but they don't

314

# have more videos

315

break

316

more_widget_html = more['load_more_widget_html']

317

318

319

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

320

def _process_page(self, content):

321

for video_id, video_title in self.extract_videos_from_page(content):

322

yield self.url_result(video_id, 'Youtube', video_id, video_title)

323

324

def extract_videos_from_page(self, page):

325

ids_in_page = []

326

titles_in_page = []

327

for mobj in re.finditer(self._VIDEO_RE, page):

328

# The link with index 0 is not the first video of the playlist (not sure if still actual)

329

if 'index' in mobj.groupdict() and mobj.group('id') == '0':

330

continue

331

video_id = mobj.group('id')

332

video_title = unescapeHTML(mobj.group('title'))

333

if video_title:

334

video_title = video_title.strip()

335

try:

336

idx = ids_in_page.index(video_id)

337

if video_title and not titles_in_page[idx]:

338

titles_in_page[idx] = video_title

339

except ValueError:

340

ids_in_page.append(video_id)

341

titles_in_page.append(video_title)

342

return zip(ids_in_page, titles_in_page)

343

344

345

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

346

def _process_page(self, content):

347

for playlist_id in orderedSet(re.findall(

348

r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',

349

content)):

350

yield self.url_result(

351

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

352

353

def _real_extract(self, url):

354

playlist_id = self._match_id(url)

355

webpage = self._download_webpage(url, playlist_id)

356

title = self._og_search_title(webpage, fatal=False)

357

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

358

359

360

class YoutubeIE(YoutubeBaseInfoExtractor):

361

IE_DESC = 'YouTube.com'

362

_VALID_URL = r"""(?x)^

363

(

364

(?:https?://|//) # http(s):// or protocol-independent URL

365

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|

366

(?:www\.)?deturl\.com/www\.youtube\.com/|

367

(?:www\.)?pwnyoutube\.com/|

368

(?:www\.)?hooktube\.com/|

369

(?:www\.)?yourepeat\.com/|

370

tube\.majestyc\.net/|

371

(?:(?:www|dev)\.)?invidio\.us/|

372

(?:www\.)?invidiou\.sh/|

373

(?:www\.)?invidious\.snopyta\.org/|

374

(?:www\.)?invidious\.kabi\.tk/|

375

(?:www\.)?vid\.wxzm\.sx/|

376

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

377

(?:.*?\#/)? # handle anchor (#/) redirect urls

378

(?: # the various things that can precede the ID:

379

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

380

|(?: # or the v= param in all its forms

381

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

382

(?:\?|\#!?) # the params delimiter ? or # or #!

383

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

389

vid\.plus| # or vid.plus/xxxx

390

zwearz\.com/watch| # or zwearz.com/watch/xxxx

391

)/

392

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

393

)

394

)? # all until now is optional -> you can pass the naked ID

395

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

396

(?!.*?\blist=

397

(?:

398

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

399

WL # WL are handled by the watch later IE

400

)

401

)

402

(?(1).+)? # if we found the ID, everything can follow

403

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

404

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

405

_formats = {

406

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

407

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

408

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

409

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

410

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

411

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

412

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

413

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

414

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

415

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

416

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

417

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

418

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

419

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

420

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

421

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

422

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

423

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

428

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

429

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

430

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

431

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

432

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

433

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

434

435

# Apple HTTP Live Streaming

436

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

437

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

438

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

439

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

440

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

441

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

442

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

443

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

444

445

# DASH mp4 video

446

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

447

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

448

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

449

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

450

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

451

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

452

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

453

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

454

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

455

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

456

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

457

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

458

459

# Dash mp4 audio

460

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

461

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

462

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

463

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

464

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

465

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

466

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

467

468

# Dash webm

469

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

470

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

471

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

472

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

473

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

474

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

475

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

476

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

477

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

478

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

479

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

480

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

481

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

482

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

483

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

484

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

485

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

486

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

487

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

488

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

489

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

490

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

491

492

# Dash webm audio

493

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

494

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

495

496

# Dash webm audio with opus inside

497

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

498

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

499

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

500

501

# RTMP (unnamed)

502

'_rtmp': {'protocol': 'rtmp'},

503

504

# av01 video only formats sometimes served with "unknown" codecs

505

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

506

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

507

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

508

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

509

}

510

_SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

522

'uploader': 'Philipp Hagemeister',

523

'uploader_id': 'phihag',

524

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

525

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

526

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

527

'upload_date': '20121002',

528

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

529

'categories': ['Science & Technology'],

530

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',

541

'note': 'Test generic use_cipher_signature video (#897)',

'info_dict': {

'id': 'UxxajLWwzqY',

'ext': 'mp4',

'upload_date': '20120506',

546

'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',

547

'alt_title': 'I Love It (feat. Charli XCX)',

548

'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',

549

'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',

550

'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',

551

'iconic ep', 'iconic', 'love', 'it'],

552

'duration': 180,

553

'uploader': 'Icona Pop',

554

'uploader_id': 'IconaPop',

555

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',

556

'creator': 'Icona Pop',

557

'track': 'I Love It (feat. Charli XCX)',

558

'artist': 'Icona Pop',

}

},

{

'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',

563

'note': 'Test VEVO video with age protection (#956)',

'info_dict': {

'id': '07FYdnEawAQ',

'ext': 'mp4',

'upload_date': '20130703',

568

'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',

569

'alt_title': 'Tunnel Vision',

570

'description': 'md5:07dab3356cde4199048e4c7cd93471e1',

571

'duration': 419,

572

'uploader': 'justintimberlakeVEVO',

573

'uploader_id': 'justintimberlakeVEVO',

574

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',

575

'creator': 'Justin Timberlake',

576

'track': 'Tunnel Vision',

577

'artist': 'Justin Timberlake',

'age_limit': 18,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

583

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

588

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

589

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

590

'uploader': 'SET India',

591

'uploader_id': 'setindia',

592

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

598

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

603

'uploader': 'Philipp Hagemeister',

604

'uploader_id': 'phihag',

605

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

606

'upload_date': '20121002',

607

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

608

'categories': ['Science & Technology'],

609

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

614

},

615

'params': {

616

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

621

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

626

'uploader_id': '8KVIDEO',

627

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

628

'description': '',

629

'uploader': '8KVIDEO',

630

'title': 'UHDTV TEST 8K VIDEO.mp4'

631

},

632

'params': {

633

'youtube_include_dash_manifest': True,

634

'format': '141',

635

},

636

'skip': 'format 141 not served anymore',

637

},

638

# DASH manifest with encrypted signature

639

{

640

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

645

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

646

'duration': 244,

647

'uploader': 'AfrojackVEVO',

648

'uploader_id': 'AfrojackVEVO',

649

'upload_date': '20131011',

650

},

651

'params': {

652

'youtube_include_dash_manifest': True,

653

'format': '141/bestaudio[ext=m4a]',

654

},

655

},

656

# JS player signature function name containing $

657

{

658

'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',

'info_dict': {

'id': 'nfWlot6h_JM',

'ext': 'm4a',

'title': 'Taylor Swift - Shake It Off',

663

'description': 'md5:bec2185232c05479482cb5a9b82719bf',

664

'duration': 242,

665

'uploader': 'TaylorSwiftVEVO',

666

'uploader_id': 'TaylorSwiftVEVO',

667

'upload_date': '20140818',

668

'creator': 'Taylor Swift',

669

},

670

'params': {

671

'youtube_include_dash_manifest': True,

672

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

683

'uploader': 'Amazing Atheist',

684

'uploader_id': 'TheAmazingAtheist',

685

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

686

'title': 'Burning Everyone\'s Koran',

687

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

688

}

689

},

690

# Normal age-gate video (No vevo, embed allowed)

691

{

692

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

697

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

698

'duration': 142,

699

'uploader': 'The Witcher',

700

'uploader_id': 'WitcherGame',

701

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

702

'upload_date': '20140605',

'age_limit': 18,

},

},

# Age-gate video with encrypted signature

707

{

708

'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',

'info_dict': {

'id': '6kLq3WMV1nU',

'ext': 'mp4',

'title': 'Dedication To My Ex (Miss That) (Lyric Video)',

713

'description': 'md5:33765bb339e1b47e7e72b5490139bb41',

714

'duration': 246,

715

'uploader': 'LloydVEVO',

716

'uploader_id': 'LloydVEVO',

717

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',

718

'upload_date': '20110629',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

723

# YouTube Red ad is not captured for creator

724

{

725

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

731

'uploader_id': 'deadmau5',

732

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

733

'creator': 'deadmau5',

734

'description': 'md5:12c56784b8032162bb936a5f76d55360',

735

'uploader': 'deadmau5',

736

'title': 'Deadmau5 - Some Chords (HD)',

737

'alt_title': 'Some Chords',

738

},

739

'expected_warnings': [

740

'DASH manifest missing',

741

]

742

},

743

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

744

{

745

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

751

'uploader_id': 'olympic',

752

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

753

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

754

'uploader': 'Olympic',

755

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

756

},

757

'params': {

758

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

768

'duration': 85,

769

'upload_date': '20110310',

770

'uploader_id': 'AllenMeow',

771

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

772

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

773

'uploader': '孫ᄋᄅ',

774

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

775

},

776

},

777

# url_encoded_fmt_stream_map is empty string

778

{

779

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

784

'description': '',

785

'upload_date': '20150404',

786

'uploader_id': 'spbelect',

787

'uploader': 'Наблюдатели Петербурга',

788

},

789

'params': {

790

'skip_download': 'requires avconv',

791

},

792

'skip': 'This live event has ended.',

793

},

794

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

795

{

796

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

801

'description': 'md5:116377fd2963b81ec4ce64b542173306',

802

'duration': 220,

803

'upload_date': '20150625',

804

'uploader_id': 'dorappi2000',

805

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

806

'uploader': 'dorappi2000',

807

'formats': 'mincount:31',

808

},

809

'skip': 'not actual anymore',

810

},

811

# DASH manifest with segment_list

812

{

813

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

814

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

819

'uploader': 'Airtek',

820

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

821

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

822

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

823

},

824

'params': {

825

'youtube_include_dash_manifest': True,

826

'format': '135', # bestvideo

827

},

828

'skip': 'This live event has ended.',

829

},

830

{

831

# Multifeed videos (multiple cameras), URL is for Main Camera

832

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

833

'info_dict': {

834

'id': 'jqWvoWXjCVs',

835

'title': 'teamPGP: Rocket League Noob Stream',

836

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

843

'description': 'md5:dc7872fb300e143831327f1bae3af010',

844

'duration': 7335,

845

'upload_date': '20150721',

846

'uploader': 'Beer Games Beer',

847

'uploader_id': 'beergamesbeer',

848

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

849

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

856

'description': 'md5:dc7872fb300e143831327f1bae3af010',

857

'duration': 7337,

858

'upload_date': '20150721',

859

'uploader': 'Beer Games Beer',

860

'uploader_id': 'beergamesbeer',

861

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

862

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

869

'description': 'md5:dc7872fb300e143831327f1bae3af010',

870

'duration': 7337,

871

'upload_date': '20150721',

872

'uploader': 'Beer Games Beer',

873

'uploader_id': 'beergamesbeer',

874

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

875

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

882

'description': 'md5:dc7872fb300e143831327f1bae3af010',

883

'duration': 7334,

884

'upload_date': '20150721',

885

'uploader': 'Beer Games Beer',

886

'uploader_id': 'beergamesbeer',

887

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

888

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

893

},

894

'skip': 'This video is not available.',

895

},

896

{

897

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

898

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

899

'info_dict': {

900

'id': 'gVfLd0zydlo',

901

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

902

},

903

'playlist_count': 2,

904

'skip': 'Not multifeed anymore',

905

},

906

{

907

'url': 'https://vid.plus/FlRa-iH7PGw',

908

'only_matching': True,

909

},

910

{

911

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

912

'only_matching': True,

913

},

914

{

915

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

916

# Also tests cut-off URL expansion in video description (see

917

# https://github.com/ytdl-org/youtube-dl/issues/1892,

918

# https://github.com/ytdl-org/youtube-dl/issues/8164)

919

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

924

'alt_title': 'Dark Walk - Position Music',

925

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

926

'duration': 133,

927

'upload_date': '20151119',

928

'uploader_id': 'IronSoulElf',

929

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

930

'uploader': 'IronSoulElf',

931

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

932

'track': 'Dark Walk - Position Music',

933

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

934

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

935

},

936

'params': {

937

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

942

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

943

'only_matching': True,

944

},

945

{

946

# Video with yt:stretch=17:0

947

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

952

'description': 'md5:ee18a25c350637c8faff806845bddee9',

953

'upload_date': '20151107',

954

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

955

'uploader': 'CH GAMER DROID',

956

},

957

'params': {

958

'skip_download': True,

959

},

960

'skip': 'This video does not exist.',

961

},

962

{

963

# Video licensed under Creative Commons

964

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

969

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

970

'duration': 721,

971

'upload_date': '20150127',

972

'uploader_id': 'BerkmanCenter',

973

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

974

'uploader': 'The Berkman Klein Center for Internet & Society',

975

'license': 'Creative Commons Attribution license (reuse allowed)',

976

},

977

'params': {

978

'skip_download': True,

},

},

{

# Channel-like uploader_url

983

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

988

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

989

'duration': 4060,

990

'upload_date': '20151119',

991

'uploader': 'Bernie Sanders',

992

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

993

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

994

'license': 'Creative Commons Attribution license (reuse allowed)',

995

},

996

'params': {

997

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1002

'only_matching': True,

1003

},

1004

{

1005

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1006

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1007

'only_matching': True,

1008

},

1009

{

1010

# Rental video preview

1011

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1016

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1017

'upload_date': '20150811',

1018

'uploader': 'FlixMatrix',

1019

'uploader_id': 'FlixMatrixKaravan',

1020

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1021

'license': 'Standard YouTube License',

1022

},

1023

'params': {

1024

'skip_download': True,

1025

},

1026

'skip': 'This video is not available.',

1027

},

1028

{

1029

# YouTube Red video with episode data

1030

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1035

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

1036

'duration': 2085,

1037

'upload_date': '20170118',

1038

'uploader': 'Vsauce',

1039

'uploader_id': 'Vsauce',

1040

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1041

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1047

},

1048

'expected_warnings': [

1049

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1054

# as inappropriate or offensive to some audiences.

1055

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1060

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1061

'duration': 965,

1062

'upload_date': '20140124',

1063

'uploader': 'New Century Foundation',

1064

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1065

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1066

},

1067

'params': {

1068

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1074

'only_matching': True,

1075

},

1076

{

1077

# geo restricted to JP

1078

'url': 'sJL6WA-aGkQ',

1079

'only_matching': True,

1080

},

1081

{

1082

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

1083

'only_matching': True,

1084

},

1085

{

1086

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1087

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1092

'only_matching': True,

1093

},

1094

{

1095

# Video with unsupported adaptive stream type formats

1096

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1101

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1102

'duration': 433,

1103

'upload_date': '20130923',

1104

'uploader': 'Amelia Putri Harwita',

1105

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1106

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1107

'formats': 'maxcount:10',

1108

},

1109

'params': {

1110

'skip_download': True,

1111

'youtube_include_dash_manifest': False,

},

},

{

# Youtube Music Auto-generated description

1116

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1121

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1122

'upload_date': '20190312',

1123

'uploader': 'Various Artists - Topic',

1124

'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',

1125

'artist': 'Stephen',

1126

'track': 'Voyeur Girl',

1127

'album': 'it\'s too much love to know my dear',

1128

'release_date': '20190313',

1129

'release_year': 2019,

1130

},

1131

'params': {

1132

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1137

# Retrieve 'artist' field from 'Artist:' in video description

1138

# when it is present on youtube music video

1139

'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',

'info_dict': {

'id': 'k0jLE7tTwjY',

'ext': 'mp4',

'title': 'Latch Feat. Sam Smith',

1144

'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',

1145

'upload_date': '20150110',

1146

'uploader': 'Various Artists - Topic',

1147

'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',

1148

'artist': 'Disclosure',

1149

'track': 'Latch Feat. Sam Smith',

1150

'album': 'Latch Featuring Sam Smith',

1151

'release_date': '20121008',

1152

'release_year': 2012,

1153

},

1154

'params': {

1155

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1160

# handle multiple artists on youtube music video

1161

'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',

'info_dict': {

'id': '74qn0eJSjpA',

'ext': 'mp4',

'title': 'Eastside',

'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',

1167

'upload_date': '20180710',

1168

'uploader': 'Benny Blanco - Topic',

1169

'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',

1170

'artist': 'benny blanco, Halsey, Khalid',

1171

'track': 'Eastside',

1172

'album': 'Eastside',

1173

'release_date': '20180713',

1174

'release_year': 2018,

1175

},

1176

'params': {

1177

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1182

# handle youtube music video with release_year and no release_date

1183

'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',

'info_dict': {

'id': '-hcAI0g-f5M',

'ext': 'mp4',

'title': 'Put It On Me',

1188

'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',

1189

'upload_date': '20180426',

1190

'uploader': 'Matt Maeson - Topic',

1191

'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',

1192

'artist': 'Matt Maeson',

1193

'track': 'Put It On Me',

1194

'album': 'The Hearse',

1195

'release_date': None,

1196

'release_year': 2018,

1197

},

1198

'params': {

1199

'skip_download': True,

},

},

]

def __init__(self, *args, **kwargs):

1205

super(YoutubeIE, self).__init__(*args, **kwargs)

1206

self._player_cache = {}

1207

1208

def report_video_info_webpage_download(self, video_id):

1209

"""Report attempt to download video info webpage."""

1210

self.to_screen('%s: Downloading video info webpage' % video_id)

1211

1212

def report_information_extraction(self, video_id):

1213

"""Report attempt to extract video information."""

1214

self.to_screen('%s: Extracting video information' % video_id)

1215

1216

def report_unavailable_format(self, video_id, format):

1217

"""Report extracted video URL."""

1218

self.to_screen('%s: Format %s not available' % (video_id, format))

1219

1220

def report_rtmp_download(self):

1221

"""Indicate the download will use the RTMP protocol."""

1222

self.to_screen('RTMP download detected')

1223

1224

def _signature_cache_id(self, example_sig):

1225

""" Return a string representation of a signature """

1226

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1227

1228

def _extract_signature_function(self, video_id, player_url, example_sig):

1229

id_m = re.match(

1230

r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',

1231

player_url)

1232

if not id_m:

1233

raise ExtractorError('Cannot identify player %r' % player_url)

1234

player_type = id_m.group('ext')

1235

player_id = id_m.group('id')

1236

1237

# Read from filesystem cache

1238

func_id = '%s_%s_%s' % (

1239

player_type, player_id, self._signature_cache_id(example_sig))

1240

assert os.path.basename(func_id) == func_id

1241

1242

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1243

if cache_spec is not None:

1244

return lambda s: ''.join(s[i] for i in cache_spec)

1245

1246

download_note = (

1247

'Downloading player %s' % player_url

1248

if self._downloader.params.get('verbose') else

1249

'Downloading %s player %s' % (player_type, player_id)

1250

)

1251

if player_type == 'js':

1252

code = self._download_webpage(

1253

player_url, video_id,

1254

note=download_note,

1255

errnote='Download of %s failed' % player_url)

1256

res = self._parse_sig_js(code)

1257

elif player_type == 'swf':

1258

urlh = self._request_webpage(

1259

player_url, video_id,

1260

note=download_note,

1261

errnote='Download of %s failed' % player_url)

1262

code = urlh.read()

1263

res = self._parse_sig_swf(code)

1264

else:

1265

assert False, 'Invalid player type %r' % player_type

1266

1267

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1268

cache_res = res(test_string)

1269

cache_spec = [ord(c) for c in cache_res]

1270

1271

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1272

return res

1273

1274

def _print_sig_code(self, func, example_sig):

1275

def gen_sig_code(idxs):

1276

def _genslice(start, end, step):

1277

starts = '' if start == 0 else str(start)

1278

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1279

steps = '' if step == 1 else (':%d' % step)

1280

return 's[%s%s%s]' % (starts, ends, steps)

1281

1282

step = None

1283

# Quelch pyflakes warnings - start will be set when step is set

1284

start = '(Never used)'

1285

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1290

step = None

1291

continue

1292

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1302

1303

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1304

cache_res = func(test_string)

1305

cache_spec = [ord(c) for c in cache_res]

1306

expr_code = ' + '.join(gen_sig_code(cache_spec))

1307

signature_id_tuple = '(%s)' % (

1308

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1309

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1310

' return %s\n') % (signature_id_tuple, expr_code)

1311

self.to_screen('Extracted signature function:\n' + code)

1312

1313

def _parse_sig_js(self, jscode):

1314

funcname = self._search_regex(

1315

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1316

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1317

# Obsolete patterns

1318

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1319

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1320

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1321

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1322

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1323

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1324

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1325

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1326

jscode, 'Initial JS player signature function name', group='sig')

1327

1328

jsi = JSInterpreter(jscode)

1329

initial_function = jsi.extract_function(funcname)

1330

return lambda s: initial_function([s])

1331

1332

def _parse_sig_swf(self, file_contents):

1333

swfi = SWFInterpreter(file_contents)

1334

TARGET_CLASSNAME = 'SignatureDecipher'

1335

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1336

initial_function = swfi.extract_function(searched_class, 'decipher')

1337

return lambda s: initial_function([s])

1338

1339

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1340

"""Turn the encrypted s field into a working signature"""

1341

1342

if player_url is None:

1343

raise ExtractorError('Cannot decrypt signature without player_url')

1344

1345

if player_url.startswith('//'):

1346

player_url = 'https:' + player_url

1347

elif not re.match(r'https?://', player_url):

1348

player_url = compat_urlparse.urljoin(

1349

'https://www.youtube.com', player_url)

1350

try:

1351

player_id = (player_url, self._signature_cache_id(s))

1352

if player_id not in self._player_cache:

1353

func = self._extract_signature_function(

1354

video_id, player_url, s

1355

)

1356

self._player_cache[player_id] = func

1357

func = self._player_cache[player_id]

1358

if self._downloader.params.get('youtube_print_sig_code'):

1359

self._print_sig_code(func, s)

1360

return func(s)

1361

except Exception as e:

1362

tb = traceback.format_exc()

1363

raise ExtractorError(

1364

'Signature extraction failed: ' + tb, cause=e)

1365

1366

def _get_subtitles(self, video_id, webpage):

1367

try:

1368

subs_doc = self._download_xml(

1369

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1370

video_id, note=False)

1371

except ExtractorError as err:

1372

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1377

lang = track.attrib['lang_code']

1378

if lang in sub_lang_list:

1379

continue

1380

sub_formats = []

1381

for ext in self._SUBTITLE_FORMATS:

1382

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1387

})

1388

sub_formats.append({

1389

'url': 'https://www.youtube.com/api/timedtext?' + params,

1390

'ext': ext,

1391

})

1392

sub_lang_list[lang] = sub_formats

1393

if not sub_lang_list:

1394

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1399

patterns = (

1400

# User data may contain arbitrary character sequences that may affect

1401

# JSON extraction with regex, e.g. when '};' is contained the second

1402

# regex won't capture the whole JSON. Yet working around by trying more

1403

# concrete regex first keeping in mind proper quoted string handling

1404

# to be implemented in future that will replace this workaround (see

1405

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1406

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1407

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1408

r';ytplayer\.config\s*=\s*({.+?});',

1409

)

1410

config = self._search_regex(

1411

patterns, webpage, 'ytplayer.config', default=None)

1412

if config:

1413

return self._parse_json(

1414

uppercase_escape(config), video_id, fatal=False)

1415

1416

def _get_automatic_captions(self, video_id, webpage):

1417

"""We need the webpage for getting the captions url, pass it as an

1418

argument to speed up the process."""

1419

self.to_screen('%s: Looking for automatic captions' % video_id)

1420

player_config = self._get_ytplayer_config(video_id, webpage)

1421

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1422

if not player_config:

1423

self._downloader.report_warning(err_msg)

1424

return {}

1425

try:

1426

args = player_config['args']

1427

caption_url = args.get('ttsurl')

1428

if caption_url:

1429

timestamp = args['timestamp']

1430

# We get the available subtitles

1431

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1437

caption_list = self._download_xml(list_url, video_id)

1438

original_lang_node = caption_list.find('track')

1439

if original_lang_node is None:

1440

self._downloader.report_warning('Video doesn\'t have automatic captions')

1441

return {}

1442

original_lang = original_lang_node.attrib['lang_code']

1443

caption_kind = original_lang_node.attrib.get('kind', '')

1444

1445

sub_lang_list = {}

1446

for lang_node in caption_list.findall('target'):

1447

sub_lang = lang_node.attrib['lang_code']

1448

sub_formats = []

1449

for ext in self._SUBTITLE_FORMATS:

1450

params = compat_urllib_parse_urlencode({

1451

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1456

})

1457

sub_formats.append({

1458

'url': caption_url + '&' + params,

1459

'ext': ext,

1460

})

1461

sub_lang_list[sub_lang] = sub_formats

1462

return sub_lang_list

1463

1464

def make_captions(sub_url, sub_langs):

1465

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1466

caption_qs = compat_parse_qs(parsed_sub_url.query)

1467

captions = {}

1468

for sub_lang in sub_langs:

1469

sub_formats = []

1470

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1476

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1482

return captions

1483

1484

# New captions format as of 22.06.2017

1485

player_response = args.get('player_response')

1486

if player_response and isinstance(player_response, compat_str):

1487

player_response = self._parse_json(

1488

player_response, video_id, fatal=False)

1489

if player_response:

1490

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1491

base_url = renderer['captionTracks'][0]['baseUrl']

1492

sub_lang_list = []

1493

for lang in renderer['translationLanguages']:

1494

lang_code = lang.get('languageCode')

1495

if lang_code:

1496

sub_lang_list.append(lang_code)

1497

return make_captions(base_url, sub_lang_list)

1498

1499

# Some videos don't provide ttsurl but rather caption_tracks and

1500

# caption_translation_languages (e.g. 20LmZk1hakA)

1501

# Does not used anymore as of 22.06.2017

1502

caption_tracks = args['caption_tracks']

1503

caption_translation_languages = args['caption_translation_languages']

1504

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1505

sub_lang_list = []

1506

for lang in caption_translation_languages.split(','):

1507

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1508

sub_lang = lang_qs.get('lc', [None])[0]

1509

if sub_lang:

1510

sub_lang_list.append(sub_lang)

1511

return make_captions(caption_url, sub_lang_list)

1512

# An extractor error can be raise by the download process if there are

1513

# no automatic captions but there are subtitles

1514

except (KeyError, IndexError, ExtractorError):

1515

self._downloader.report_warning(err_msg)

1516

return {}

1517

1518

def _mark_watched(self, video_id, video_info, player_response):

1519

playback_url = url_or_none(try_get(

1520

player_response,

1521

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1522

video_info, lambda x: x['videostats_playback_base_url'][0]))

1523

if not playback_url:

1524

return

1525

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1526

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1527

1528

# cpn generation algorithm is reverse engineered from base.js.

1529

# In fact it works even with dummy cpn.

1530

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1531

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1538

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1539

1540

self._download_webpage(

1541

playback_url, video_id, 'Marking watched',

1542

'Unable to mark watched', fatal=False)

1543

1544

@staticmethod

1545

def _extract_urls(webpage):

1546

# Embedded YouTube player

1547

entries = [

1548

unescapeHTML(mobj.group('url'))

1549

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1560

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1561

\1''', webpage)]

1562

1563

# lazyYT YouTube embed

1564

entries.extend(list(map(

1565

unescapeHTML,

1566

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1567

1568

# Wordpress "YouTube Video Importer" plugin

1569

matches = re.findall(r'''(?x)<div[^>]+

1570

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1571

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1572

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1578

urls = YoutubeIE._extract_urls(webpage)

1579

return urls[0] if urls else None

1580

1581

@classmethod

1582

def extract_id(cls, url):

1583

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1584

if mobj is None:

1585

raise ExtractorError('Invalid URL: %s' % url)

1586

video_id = mobj.group(2)

1587

return video_id

1588

1589

def _extract_annotations(self, video_id):

1590

return self._download_webpage(

1591

'https://www.youtube.com/annotations_invideo', video_id,

1592

note='Downloading annotations',

1593

errnote='Unable to download video annotations', fatal=False,

query={

'features': 1,

'legacy': 1,

'video_id': video_id,

})

@staticmethod

def _extract_chapters(description, duration):

1602

if not description:

1603

return None

1604

chapter_lines = re.findall(

1605

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1606

description)

1607

if not chapter_lines:

1608

return None

1609

chapters = []

1610

for next_num, (chapter_line, time_point) in enumerate(

1611

chapter_lines, start=1):

1612

start_time = parse_duration(time_point)

1613

if start_time is None:

1614

continue

1615

if start_time > duration:

1616

break

1617

end_time = (duration if next_num == len(chapter_lines)

1618

else parse_duration(chapter_lines[next_num][1]))

1619

if end_time is None:

1620

continue

1621

if end_time > duration:

1622

end_time = duration

1623

if start_time > end_time:

1624

break

1625

chapter_title = re.sub(

1626

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1627

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1628

chapters.append({

1629

'start_time': start_time,

1630

'end_time': end_time,

1631

'title': chapter_title,

})

return chapters

def _real_extract(self, url):

1636

url, smuggled_data = unsmuggle_url(url, {})

1637

1638

proto = (

1639

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1645

for component in [parsed_url.fragment, parsed_url.query]:

1646

query = compat_parse_qs(component)

1647

if start_time is None and 't' in query:

1648

start_time = parse_duration(query['t'][0])

1649

if start_time is None and 'start' in query:

1650

start_time = parse_duration(query['start'][0])

1651

if end_time is None and 'end' in query:

1652

end_time = parse_duration(query['end'][0])

1653

1654

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1655

mobj = re.search(self._NEXT_URL_RE, url)

1656

if mobj:

1657

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1658

video_id = self.extract_id(url)

1659

1660

# Get video webpage

1661

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1662

video_webpage = self._download_webpage(url, video_id)

1663

1664

# Attempt to extract SWF player URL

1665

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1666

if mobj is not None:

1667

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1674

dash_mpd = video_info.get('dashmpd')

1675

if dash_mpd and dash_mpd[0] not in dash_mpds:

1676

dash_mpds.append(dash_mpd[0])

1677

1678

def add_dash_mpd_pr(pl_response):

1679

dash_mpd = url_or_none(try_get(

1680

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1681

compat_str))

1682

if dash_mpd and dash_mpd not in dash_mpds:

1683

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1689

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1690

1691

def extract_token(v_info):

1692

return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))

player_response = {}

# Get video info

embed_webpage = None

if re.search(r'player-age-gate-content">', video_webpage) is not None:

1699

age_gate = True

1700

# We simulate the access to the video from www.youtube.com/v/{video_id}

1701

# this can be viewed without login into Youtube

1702

url = proto + '://www.youtube.com/embed/%s' % video_id

1703

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1704

data = compat_urllib_parse_urlencode({

1705

'video_id': video_id,

1706

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1707

'sts': self._search_regex(

1708

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1709

})

1710

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1711

video_info_webpage = self._download_webpage(

1712

video_info_url, video_id,

1713

note='Refetching age-gated info webpage',

1714

errnote='unable to download video info webpage')

1715

video_info = compat_parse_qs(video_info_webpage)

1716

add_dash_mpd(video_info)

else:

age_gate = False

video_info = None

sts = None

# Try looking directly into the video webpage

1722

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1723

if ytplayer_config:

1724

args = ytplayer_config['args']

1725

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1726

# Convert to the same format returned by compat_parse_qs

1727

video_info = dict((k, [v]) for k, v in args.items())

1728

add_dash_mpd(video_info)

1729

# Rental video is not rented but preview is available (e.g.

1730

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1731

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1732

if not video_info and args.get('ypc_vid'):

1733

return self.url_result(

1734

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1735

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1736

is_live = True

1737

sts = ytplayer_config.get('sts')

1738

if not player_response:

1739

pl_response = str_or_none(args.get('player_response'))

1740

if pl_response:

1741

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1742

if isinstance(pl_response, dict):

1743

player_response = pl_response

1744

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1745

add_dash_mpd_pr(player_response)

1746

# We also try looking in get_video_info since it may contain different dashmpd

1747

# URL that points to a DASH manifest with possibly different itag set (some itags

1748

# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH

1749

# manifest pointed by get_video_info's dashmpd).

1750

# The general idea is to take a union of itags of both DASH manifests (for example

1751

# video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)

1752

self.report_video_info_webpage_download(video_id)

1753

for el in ('embedded', 'detailpage', 'vevo', ''):

1754

query = {

1755

'video_id': video_id,

'ps': 'default',

'eurl': '',

'gl': 'US',

'hl': 'en',

}

if el:

query['el'] = el

if sts:

query['sts'] = sts

video_info_webpage = self._download_webpage(

1766

'%s://www.youtube.com/get_video_info' % proto,

1767

video_id, note=False,

1768

errnote='unable to download video info webpage',

1769

fatal=False, query=query)

1770

if not video_info_webpage:

1771

continue

1772

get_video_info = compat_parse_qs(video_info_webpage)

1773

if not player_response:

1774

pl_response = get_video_info.get('player_response', [None])[0]

1775

if isinstance(pl_response, dict):

1776

player_response = pl_response

1777

add_dash_mpd_pr(player_response)

1778

add_dash_mpd(get_video_info)

1779

if view_count is None:

1780

view_count = extract_view_count(get_video_info)

1781

if not video_info:

1782

video_info = get_video_info

1783

get_token = extract_token(get_video_info)

1784

if get_token:

1785

# Different get_video_info requests may report different results, e.g.

1786

# some may report video unavailability, but some may serve it without

1787

# any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,

1788

# the original webpage as well as el=info and el=embedded get_video_info

1789

# requests report video unavailability due to geo restriction while

1790

# el=detailpage succeeds and returns valid data). This is probably

1791

# due to YouTube measures against IP ranges of hosting providers.

1792

# Working around by preferring the first succeeded video_info containing

1793

# the token if no such video_info yet was found.

1794

token = extract_token(video_info)

1795

if not token:

1796

video_info = get_video_info

1797

break

1798

1799

def extract_unavailable_message():

1800

return self._html_search_regex(

1801

r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',

1802

video_webpage, 'unavailable message', default=None)

1803

1804

if not video_info:

1805

unavailable_message = extract_unavailable_message()

1806

if not unavailable_message:

1807

unavailable_message = 'Unable to extract video data'

1808

raise ExtractorError(

1809

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1810

1811

video_details = try_get(

1812

player_response, lambda x: x['videoDetails'], dict) or {}

1813

1814

# title

1815

if 'title' in video_info:

1816

video_title = video_info['title'][0]

1817

elif 'title' in player_response:

1818

video_title = video_details['title']

1819

else:

1820

self._downloader.report_warning('Unable to extract video title')

video_title = '_'

# description

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1825

if video_description:

1826

1827

def replace_url(m):

1828

redir_url = compat_urlparse.urljoin(url, m.group(1))

1829

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1830

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1831

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1838

<a\s+

1839

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1840

(?:title|href)="([^"]+)"\s+

1841

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1846

video_description = clean_html(video_description)

1847

else:

1848

fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)

1849

if fd_mobj:

1850

video_description = unescapeHTML(fd_mobj.group(1))

1851

else:

1852

video_description = ''

1853

1854

if not smuggled_data.get('force_singlefeed', False):

1855

if not self._downloader.params.get('noplaylist'):

1856

multifeed_metadata_list = try_get(

1857

player_response,

1858

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1859

compat_str) or try_get(

1860

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1861

if multifeed_metadata_list:

1862

entries = []

1863

feed_ids = []

1864

for feed in multifeed_metadata_list.split(','):

1865

# Unquote should take place before split on comma (,) since textual

1866

# fields may contain comma as well (see

1867

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1868

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1869

entries.append({

1870

'_type': 'url_transparent',

1871

'ie_key': 'Youtube',

1872

'url': smuggle_url(

1873

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1874

{'force_singlefeed': True}),

1875

'title': '%s (%s)' % (video_title, feed_data['title'][0]),

1876

})

1877

feed_ids.append(feed_data['id'][0])

1878

self.to_screen(

1879

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1880

% (', '.join(feed_ids), video_id))

1881

return self.playlist_result(entries, video_id, video_title, video_description)

1882

else:

1883

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1884

1885

if view_count is None:

1886

view_count = extract_view_count(video_info)

1887

if view_count is None and video_details:

1888

view_count = int_or_none(video_details.get('viewCount'))

1889

1890

# Check for "rental" videos

1891

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1892

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

1893

1894

def _extract_filesize(media_url):

1895

return int_or_none(self._search_regex(

1896

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

1897

1898

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1899

self.report_rtmp_download()

1900

formats = [{

1901

'format_id': '_rtmp',

1902

'protocol': 'rtmp',

1903

'url': video_info['conn'][0],

1904

'player_url': player_url,

1905

}]

1906

elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

1907

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1908

if 'rtmpe%3Dyes' in encoded_url_map:

1909

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

1910

formats_spec = {}

1911

fmt_list = video_info.get('fmt_list', [''])[0]

1912

if fmt_list:

1913

for fmt in fmt_list.split(','):

1914

spec = fmt.split('/')

1915

if len(spec) > 1:

1916

width_height = spec[1].split('x')

1917

if len(width_height) == 2:

1918

formats_spec[spec[0]] = {

1919

'resolution': spec[1],

1920

'width': int_or_none(width_height[0]),

1921

'height': int_or_none(width_height[1]),

1922

}

1923

q = qualities(['small', 'medium', 'hd720'])

1924

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)

1925

if streaming_formats:

1926

for fmt in streaming_formats:

1927

itag = str_or_none(fmt.get('itag'))

1928

if not itag:

1929

continue

1930

quality = fmt.get('quality')

1931

quality_label = fmt.get('qualityLabel') or quality

1932

formats_spec[itag] = {

1933

'asr': int_or_none(fmt.get('audioSampleRate')),

1934

'filesize': int_or_none(fmt.get('contentLength')),

1935

'format_note': quality_label,

1936

'fps': int_or_none(fmt.get('fps')),

1937

'height': int_or_none(fmt.get('height')),

1938

'quality': q(quality),

1939

# bitrate for itag 43 is always 2147483647

1940

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

1941

'width': int_or_none(fmt.get('width')),

1942

}

1943

formats = []

1944

for url_data_str in encoded_url_map.split(','):

1945

url_data = compat_parse_qs(url_data_str)

1946

if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):

1947

continue

1948

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

1949

# Unsupported FORMAT_STREAM_TYPE_OTF

1950

if stream_type == 3:

1951

continue

1952

format_id = url_data['itag'][0]

1953

url = url_data['url'][0]

1954

1955

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

1956

ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'

1957

jsplayer_url_json = self._search_regex(

1958

ASSETS_RE,

1959

embed_webpage if age_gate else video_webpage,

1960

'JS player URL (1)', default=None)

1961

if not jsplayer_url_json and not age_gate:

1962

# We need the embed website after all

1963

if embed_webpage is None:

1964

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

1965

embed_webpage = self._download_webpage(

1966

embed_url, video_id, 'Downloading embed webpage')

1967

jsplayer_url_json = self._search_regex(

1968

ASSETS_RE, embed_webpage, 'JS player URL')

1969

1970

player_url = json.loads(jsplayer_url_json)

1971

if player_url is None:

1972

player_url_json = self._search_regex(

1973

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

1974

video_webpage, 'age gate player URL')

1975

player_url = json.loads(player_url_json)

1976

1977

if 'sig' in url_data:

1978

url += '&signature=' + url_data['sig'][0]

1979

elif 's' in url_data:

1980

encrypted_sig = url_data['s'][0]

1981

1982

if self._downloader.params.get('verbose'):

1983

if player_url is None:

1984

player_version = 'unknown'

1985

player_desc = 'unknown'

1986

else:

1987

if player_url.endswith('swf'):

1988

player_version = self._search_regex(

1989

r'-(.+?)(?:/watch_as3)?\.swf$', player_url,

1990

'flash player', fatal=False)

1991

player_desc = 'flash player %s' % player_version

1992

else:

1993

player_version = self._search_regex(

1994

[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',

1995

r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],

1996

player_url,

1997

'html5 player', fatal=False)

1998

player_desc = 'html5 player %s' % player_version

1999

2000

parts_sizes = self._signature_cache_id(encrypted_sig)

2001

self.to_screen('{%s} signature length %s, %s' %

2002

(format_id, parts_sizes, player_desc))

2003

2004

signature = self._decrypt_signature(

2005

encrypted_sig, video_id, player_url, age_gate)

2006

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2007

url += '&%s=%s' % (sp, signature)

2008

if 'ratebypass' not in url:

2009

url += '&ratebypass=yes'

2010

2011

dct = {

2012

'format_id': format_id,

2013

'url': url,

2014

'player_url': player_url,

2015

}

2016

if format_id in self._formats:

2017

dct.update(self._formats[format_id])

2018

if format_id in formats_spec:

2019

dct.update(formats_spec[format_id])

2020

2021

# Some itags are not included in DASH manifest thus corresponding formats will

2022

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2023

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2024

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2025

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2026

2027

filesize = int_or_none(url_data.get(

2028

'clen', [None])[0]) or _extract_filesize(url)

2029

2030

quality = url_data.get('quality', [None])[0]

2031

2032

more_fields = {

2033

'filesize': filesize,

2034

'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),

2035

'width': width,

2036

'height': height,

2037

'fps': int_or_none(url_data.get('fps', [None])[0]),

2038

'format_note': url_data.get('quality_label', [None])[0] or quality,

2039

'quality': q(quality),

2040

}

2041

for key, value in more_fields.items():

2042

if value:

2043

dct[key] = value

2044

type_ = url_data.get('type', [None])[0]

2045

if type_:

2046

type_split = type_.split(';')

2047

kind_ext = type_split[0].split('/')

2048

if len(kind_ext) == 2:

2049

kind, _ = kind_ext

2050

dct['ext'] = mimetype2ext(type_split[0])

2051

if kind in ('audio', 'video'):

2052

codecs = None

2053

for mobj in re.finditer(

2054

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2055

if mobj.group('key') == 'codecs':

2056

codecs = mobj.group('val')

2057

break

2058

if codecs:

2059

dct.update(parse_codecs(codecs))

2060

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2061

dct['downloader_options'] = {

2062

# Youtube throttles chunks >~10M

2063

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2071

compat_str))

2072

or url_or_none(try_get(

2073

video_info, lambda x: x['hlsvp'][0], compat_str)))

2074

if manifest_url:

2075

formats = []

2076

m3u8_formats = self._extract_m3u8_formats(

2077

manifest_url, video_id, 'mp4', fatal=False)

2078

for a_format in m3u8_formats:

2079

itag = self._search_regex(

2080

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2081

if itag:

2082

a_format['format_id'] = itag

2083

if itag in self._formats:

2084

dct = self._formats[itag].copy()

2085

dct.update(a_format)

2086

a_format = dct

2087

a_format['player_url'] = player_url

2088

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2089

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2090

formats.append(a_format)

2091

else:

2092

error_message = clean_html(video_info.get('reason', [None])[0])

2093

if not error_message:

2094

error_message = extract_unavailable_message()

2095

if error_message:

2096

raise ExtractorError(error_message, expected=True)

2097

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2098

2099

# uploader

2100

video_uploader = try_get(

2101

video_info, lambda x: x['author'][0],

2102

compat_str) or str_or_none(video_details.get('author'))

2103

if video_uploader:

2104

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2105

else:

2106

self._downloader.report_warning('unable to extract uploader name')

2107

2108

# uploader_id

2109

video_uploader_id = None

2110

video_uploader_url = None

2111

mobj = re.search(

2112

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2113

video_webpage)

2114

if mobj is not None:

2115

video_uploader_id = mobj.group('uploader_id')

2116

video_uploader_url = mobj.group('uploader_url')

2117

else:

2118

self._downloader.report_warning('unable to extract uploader nickname')

2119

2120

channel_id = (

2121

str_or_none(video_details.get('channelId'))

2122

or self._html_search_meta(

2123

'channelId', video_webpage, 'channel id', default=None)

2124

or self._search_regex(

2125

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2126

video_webpage, 'channel id', default=None, group='id'))

2127

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2128

2129

# thumbnail image

2130

# We try first to get a high quality image:

2131

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2132

video_webpage, re.DOTALL)

2133

if m_thumb is not None:

2134

video_thumbnail = m_thumb.group(1)

2135

elif 'thumbnail_url' not in video_info:

2136

self._downloader.report_warning('unable to extract video thumbnail')

2137

video_thumbnail = None

2138

else: # don't panic if we can't find it

2139

video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])

2140

2141

# upload date

2142

upload_date = self._html_search_meta(

2143

'datePublished', video_webpage, 'upload date', default=None)

2144

if not upload_date:

2145

upload_date = self._search_regex(

2146

[r'(?s)id="eow-date.*?>(.*?)</span>',

2147

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2148

video_webpage, 'upload date', default=None)

2149

upload_date = unified_strdate(upload_date)

2150

2151

video_license = self._html_search_regex(

2152

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2153

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2166

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2174

video_creator = clean_html(m_music.group('creator'))

2175

else:

2176

video_alt_title = video_creator = None

2177

2178

def extract_meta(field):

2179

return self._html_search_regex(

2180

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2181

video_webpage, field, default=None)

2182

2183

track = extract_meta('Song')

2184

artist = extract_meta('Artist')

2185

album = extract_meta('Album')

2186

2187

# Youtube Music Auto-generated description

2188

release_date = release_year = None

2189

if video_description:

2190

mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)

2191

if mobj:

2192

if not track:

2193

track = mobj.group('track').strip()

2194

if not artist:

2195

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2196

if not album:

2197

album = mobj.group('album'.strip())

2198

release_year = mobj.group('release_year')

2199

release_date = mobj.group('release_date')

2200

if release_date:

2201

release_date = release_date.replace('-', '')

2202

if not release_year:

2203

release_year = int(release_date[:4])

2204

if release_year:

2205

release_year = int(release_year)

2206

2207

m_episode = re.search(

2208

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2209

video_webpage)

2210

if m_episode:

2211

series = unescapeHTML(m_episode.group('series'))

2212

season_number = int(m_episode.group('season'))

2213

episode_number = int(m_episode.group('episode'))

2214

else:

2215

series = season_number = episode_number = None

2216

2217

m_cat_container = self._search_regex(

2218

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2219

video_webpage, 'categories', default=None)

2220

if m_cat_container:

2221

category = self._html_search_regex(

2222

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

2223

default=None)

2224

video_categories = None if category is None else [category]

2225

else:

2226

video_categories = None

2227

2228

video_tags = [

2229

unescapeHTML(m.group('content'))

2230

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2231

2232

def _extract_count(count_name):

2233

return str_to_int(self._search_regex(

2234

r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'

2235

% re.escape(count_name),

2236

video_webpage, count_name, default=None))

2237

2238

like_count = _extract_count('like')

2239

dislike_count = _extract_count('dislike')

2240

2241

if view_count is None:

2242

view_count = str_to_int(self._search_regex(

2243

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2244

'view count', default=None))

2245

2246

average_rating = (

2247

float_or_none(video_details.get('averageRating'))

2248

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2249

2250

# subtitles

2251

video_subtitles = self.extract_subtitles(video_id, video_webpage)

2252

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

2253

2254

video_duration = try_get(

2255

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2256

if not video_duration:

2257

video_duration = int_or_none(video_details.get('lengthSeconds'))

2258

if not video_duration:

2259

video_duration = parse_duration(self._html_search_meta(

2260

'duration', video_webpage, 'video duration'))

2261

2262

# annotations

2263

video_annotations = None

2264

if self._downloader.params.get('writeannotations', False):

2265

video_annotations = self._extract_annotations(video_id)

2266

2267

chapters = self._extract_chapters(description_original, video_duration)

2268

2269

# Look for the DASH manifest

2270

if self._downloader.params.get('youtube_include_dash_manifest', True):

2271

dash_mpd_fatal = True

2272

for mpd_url in dash_mpds:

2273

dash_formats = {}

2274

try:

2275

def decrypt_sig(mobj):

2276

s = mobj.group(1)

2277

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2278

return '/signature/%s' % dec_s

2279

2280

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2281

2282

for df in self._extract_mpd_formats(

2283

mpd_url, video_id, fatal=dash_mpd_fatal,

2284

formats_dict=self._formats):

2285

if not df.get('filesize'):

2286

df['filesize'] = _extract_filesize(df['url'])

2287

# Do not overwrite DASH format found in some previous DASH manifest

2288

if df['format_id'] not in dash_formats:

2289

dash_formats[df['format_id']] = df

2290

# Additional DASH manifests may end up in HTTP Error 403 therefore

2291

# allow them to fail without bug report message if we already have

2292

# some DASH manifest succeeded. This is temporary workaround to reduce

2293

# burst of bug reports until we figure out the reason and whether it

2294

# can be fixed at all.

2295

dash_mpd_fatal = False

2296

except (ExtractorError, KeyError) as e:

2297

self.report_warning(

2298

'Skipping DASH manifest: %r' % e, video_id)

2299

if dash_formats:

2300

# Remove the formats we found through non-DASH, they

2301

# contain less info and it can be wrong, because we use

2302

# fixed values (for example the resolution). See

2303

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2304

# example.

2305

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2306

formats.extend(dash_formats.values())

2307

2308

# Check for malformed aspect ratio

2309

stretched_m = re.search(

2310

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2311

video_webpage)

2312

if stretched_m:

2313

w = float(stretched_m.group('w'))

2314

h = float(stretched_m.group('h'))

2315

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2316

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2321

f['stretched_ratio'] = ratio

2322

2323

if not formats:

2324

token = extract_token(video_info)

2325

if not token:

2326

if 'reason' in video_info:

2327

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2328

regions_allowed = self._html_search_meta(

2329

'regionsAllowed', video_webpage, default=None)

2330

countries = regions_allowed.split(',') if regions_allowed else None

2331

self.raise_geo_restricted(

2332

msg=video_info['reason'][0], countries=countries)

2333

reason = video_info['reason'][0]

2334

if 'Invalid parameters' in reason:

2335

unavailable_message = extract_unavailable_message()

2336

if unavailable_message:

2337

reason = unavailable_message

2338

raise ExtractorError(

2339

'YouTube said: %s' % reason,

2340

expected=True, video_id=video_id)

2341

else:

2342

raise ExtractorError(

2343

'"token" parameter not in video info for unknown reason',

2344

video_id=video_id)

2345

2346

if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):

2347

raise ExtractorError('This video is DRM protected.', expected=True)

2348

2349

self._sort_formats(formats)

2350

2351

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2356

'uploader_id': video_uploader_id,

2357

'uploader_url': video_uploader_url,

2358

'channel_id': channel_id,

2359

'channel_url': channel_url,

2360

'upload_date': upload_date,

2361

'license': video_license,

2362

'creator': video_creator or artist,

2363

'title': video_title,

2364

'alt_title': video_alt_title or track,

2365

'thumbnail': video_thumbnail,

2366

'description': video_description,

2367

'categories': video_categories,

2368

'tags': video_tags,

2369

'subtitles': video_subtitles,

2370

'automatic_captions': automatic_captions,

2371

'duration': video_duration,

2372

'age_limit': 18 if age_gate else 0,

2373

'annotations': video_annotations,

2374

'chapters': chapters,

2375

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2376

'view_count': view_count,

2377

'like_count': like_count,

2378

'dislike_count': dislike_count,

2379

'average_rating': average_rating,

2380

'formats': formats,

2381

'is_live': is_live,

2382

'start_time': start_time,

2383

'end_time': end_time,

2384

'series': series,

2385

'season_number': season_number,

2386

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2391

'release_year': release_year,

}

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

2396

IE_DESC = 'YouTube.com playlists'

2397

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube\.com|

invidio\.us

)

/

(?:

(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))

2408

\? (?:.*?[&;])*? (?:p|a|list)=

2409

| p/

2410

)|

2411

youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=

2412

)

2413

(

2414

(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}

2415

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

(%(playlist_id)s)

)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

2422

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'

2423

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'

2424

IE_NAME = 'youtube:playlist'

2425

_TESTS = [{

2426

'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

2427

'info_dict': {

2428

'title': 'ytdl test PL',

2429

'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

},

'playlist_count': 3,

}, {

'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

2434

'info_dict': {

2435

'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

2436

'title': 'YDL_Empty_List',

2437

},

2438

'playlist_count': 0,

2439

'skip': 'This playlist is private',

2440

}, {

2441

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2442

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2443

'info_dict': {

2444

'title': '29C3: Not my department',

2445

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2446

},

2447

'playlist_count': 95,

2448

}, {

2449

'note': 'issue #673',

2450

'url': 'PLBB231211A4F62143',

2451

'info_dict': {

2452

'title': '[OLD]Team Fortress 2 (Class-based LP)',

2453

'id': 'PLBB231211A4F62143',

2454

},

2455

'playlist_mincount': 26,

2456

}, {

2457

'note': 'Large playlist',

2458

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2459

'info_dict': {

2460

'title': 'Uploads from Cauchemar',

2461

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2462

},

2463

'playlist_mincount': 799,

2464

}, {

2465

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2466

'info_dict': {

2467

'title': 'YDL_safe_search',

2468

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2469

},

2470

'playlist_count': 2,

2471

'skip': 'This playlist is private',

2472

}, {

2473

'note': 'embedded',

2474

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

2479

}

2480

}, {

2481

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2482

'playlist_mincount': 485,

2483

'info_dict': {

2484

'title': '2017 華語最新單曲 (2/24更新)',

2485

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2486

}

2487

}, {

2488

'note': 'Embedded SWF player',

2489

'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

2494

}

2495

}, {

2496

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2497

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2498

'info_dict': {

2499

'title': 'Uploads from Interstellar Movie',

2500

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2501

},

2502

'playlist_mincount': 21,

2503

}, {

2504

# Playlist URL that does not actually serve a playlist

2505

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2510

'uploader': 'STREEM',

2511

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2512

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2513

'upload_date': '20150526',

2514

'license': 'Standard YouTube License',

2515

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2516

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2521

},

2522

'params': {

2523

'skip_download': True,

2524

},

2525

'add_ie': [YoutubeIE.ie_key()],

2526

}, {

2527

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

2532

'uploader': 'Backus-Page House Museum',

2533

'uploader_id': 'backuspagemuseum',

2534

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

2535

'upload_date': '20161008',

2536

'license': 'Standard YouTube License',

2537

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

2538

'categories': ['Nonprofits & Activism'],

2539

'tags': list,

2540

'like_count': int,

2541

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

2546

},

2547

}, {

2548

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

2549

'only_matching': True,

2550

}, {

2551

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

2552

'only_matching': True,

2553

}, {

2554

# music album playlist

2555

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

2556

'only_matching': True,

2557

}, {

2558

'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',

2559

'only_matching': True,

2560

}]

2561

2562

def _real_initialize(self):

2563

self._login()

2564

2565

def _extract_mix(self, playlist_id):

2566

# The mixes are generated from a single video

2567

# the id of the playlist is just 'RD' + video_id

2568

ids = []

2569

last_id = playlist_id[-11:]

2570

for n in itertools.count(1):

2571

url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)

2572

webpage = self._download_webpage(

2573

url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))

2574

new_ids = orderedSet(re.findall(

2575

r'''(?xs)data-video-username=".*?".*?

2576

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

2577

webpage))

2578

# Fetch new pages until all the videos are repeated, it seems that

2579

# there are always 51 unique videos.

2580

new_ids = [_id for _id in new_ids if _id not in ids]

if not new_ids:

break

ids.extend(new_ids)

last_id = ids[-1]

url_results = self._ids_to_results(ids)

2587

2588

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

2589

title_span = (

2590

search_title('playlist-title')

2591

or search_title('title long-title')

2592

or search_title('title'))

2593

title = clean_html(title_span)

2594

2595

return self.playlist_result(url_results, playlist_id, title)

2596

2597

def _extract_playlist(self, playlist_id):

2598

url = self._TEMPLATE_URL % playlist_id

2599

page = self._download_webpage(url, playlist_id)

2600

2601

# the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)

2602

for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):

2603

match = match.strip()

2604

# Check if the playlist exists or is private

2605

mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)

2606

if mobj:

2607

reason = mobj.group('reason')

2608

message = 'This playlist %s' % reason

2609

if 'private' in reason:

2610

message += ', use --username or --netrc to access it'

2611

message += '.'

2612

raise ExtractorError(message, expected=True)

2613

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

2614

raise ExtractorError(

2615

'Invalid parameters. Maybe URL is incorrect.',

2616

expected=True)

2617

elif re.match(r'[^<]*Choose your language[^<]*', match):

2618

continue

2619

else:

2620

self.report_warning('Youtube gives an alert message: ' + match)

2621

2622

playlist_title = self._html_search_regex(

2623

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

2624

page, 'title', default=None)

2625

2626

_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='

2627

uploader = self._search_regex(

2628

r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,

2629

page, 'uploader', default=None)

2630

mobj = re.search(

2631

r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,

2632

page)

2633

if mobj:

2634

uploader_id = mobj.group('uploader_id')

2635

uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))

2636

else:

2637

uploader_id = uploader_url = None

has_videos = True

if not playlist_title:

2642

try:

2643

# Some playlist URLs don't actually serve a playlist (e.g.

2644

# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)

2645

next(self._entries(page, playlist_id))

2646

except StopIteration:

2647

has_videos = False

2648

2649

playlist = self.playlist_result(

2650

self._entries(page, playlist_id), playlist_id, playlist_title)

2651

playlist.update({

2652

'uploader': uploader,

2653

'uploader_id': uploader_id,

2654

'uploader_url': uploader_url,

2655

})

2656

2657

return has_videos, playlist

2658

2659

def _check_download_just_video(self, url, playlist_id):

2660

# Check if it's a video-specific URL

2661

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

2662

video_id = query_dict.get('v', [None])[0] or self._search_regex(

2663

r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,

2664

'video id', default=None)

2665

if video_id:

2666

if self._downloader.params.get('noplaylist'):

2667

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2668

return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)

2669

else:

2670

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

2671

return video_id, None

2672

return None, None

2673

2674

def _real_extract(self, url):

2675

# Extract playlist id

2676

mobj = re.match(self._VALID_URL, url)

2677

if mobj is None:

2678

raise ExtractorError('Invalid URL: %s' % url)

2679

playlist_id = mobj.group(1) or mobj.group(2)

2680

2681

video_id, video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith(('RD', 'UL', 'PU')):

2686

# Mixes require a custom extraction process

2687

return self._extract_mix(playlist_id)

2688

2689

has_videos, playlist = self._extract_playlist(playlist_id)

2690

if has_videos or not video_id:

2691

return playlist

2692

2693

# Some playlist URLs don't actually serve a playlist (see

2694

# https://github.com/ytdl-org/youtube-dl/issues/10537).

2695

# Fallback to plain video extraction if there is a video id

2696

# along with playlist id.

2697

return self.url_result(video_id, 'Youtube', video_id=video_id)

2698

2699

2700

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

2701

IE_DESC = 'YouTube.com channels'

2702

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'

2703

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

2704

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

2705

IE_NAME = 'youtube:channel'

2706

_TESTS = [{

2707

'note': 'paginated channel',

2708

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

2709

'playlist_mincount': 91,

2710

'info_dict': {

2711

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

2712

'title': 'Uploads from lex will',

2713

}

2714

}, {

2715

'note': 'Age restricted channel',

2716

# from https://www.youtube.com/user/DeusExOfficial

2717

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

2718

'playlist_mincount': 64,

2719

'info_dict': {

2720

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

2721

'title': 'Uploads from Deus Ex',

2722

},

2723

}, {

2724

'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',

2725

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2730

return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)

2731

else super(YoutubeChannelIE, cls).suitable(url))

2732

2733

def _build_template_url(self, url, channel_id):

2734

return self._TEMPLATE_URL % channel_id

2735

2736

def _real_extract(self, url):

2737

channel_id = self._match_id(url)

2738

2739

url = self._build_template_url(url, channel_id)

2740

2741

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

2742

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

2743

# otherwise fallback on channel by page extraction

2744

channel_page = self._download_webpage(

2745

url + '?view=57', channel_id,

2746

'Downloading channel page', fatal=False)

2747

if channel_page is False:

2748

channel_playlist_id = False

2749

else:

2750

channel_playlist_id = self._html_search_meta(

2751

'channelId', channel_page, 'channel id', default=None)

2752

if not channel_playlist_id:

2753

channel_url = self._html_search_meta(

2754

('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),

2755

channel_page, 'channel url', default=None)

2756

if channel_url:

2757

channel_playlist_id = self._search_regex(

2758

r'vnd\.youtube://user/([0-9A-Za-z_-]+)',

2759

channel_url, 'channel id', default=None)

2760

if channel_playlist_id and channel_playlist_id.startswith('UC'):

2761

playlist_id = 'UU' + channel_playlist_id[2:]

2762

return self.url_result(

2763

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

2764

2765

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

2766

autogenerated = re.search(r'''(?x)

2767

class="[^"]*?(?:

2768

channel-header-autogenerated-label|

2769

yt-channel-title-autogenerated

2770

)[^"]*"''', channel_page) is not None

2771

2772

if autogenerated:

2773

# The videos are contained in a single page

2774

# the ajax pages can't be used, they are empty

2775

entries = [

2776

self.url_result(

2777

video_id, 'Youtube', video_id=video_id,

2778

video_title=video_title)

2779

for video_id, video_title in self.extract_videos_from_page(channel_page)]

2780

return self.playlist_result(entries, channel_id)

2781

2782

try:

2783

next(self._entries(channel_page, channel_id))

2784

except StopIteration:

2785

alert_message = self._html_search_regex(

2786

r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',

2787

channel_page, 'alert', default=None, group='alert')

2788

if alert_message:

2789

raise ExtractorError('Youtube said: %s' % alert_message, expected=True)

2790

2791

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

2792

2793

2794

class YoutubeUserIE(YoutubeChannelIE):

2795

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

2796

2797

_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'

2798

IE_NAME = 'youtube:user'

2799

2800

_TESTS = [{

2801

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

2802

'playlist_mincount': 320,

2803

'info_dict': {

2804

'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',

2805

'title': 'Uploads from The Linux Foundation',

2806

}

2807

}, {

2808

# Only available via https://www.youtube.com/c/12minuteathlete/videos

2809

# but not https://www.youtube.com/user/12minuteathlete/videos

2810

'url': 'https://www.youtube.com/c/12minuteathlete/videos',

2811

'playlist_mincount': 249,

2812

'info_dict': {

2813

'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',

2814

'title': 'Uploads from 12 Minute Athlete',

2815

}

2816

}, {

2817

'url': 'ytuser:phihag',

2818

'only_matching': True,

2819

}, {

2820

'url': 'https://www.youtube.com/c/gametrailers',

2821

'only_matching': True,

2822

}, {

2823

'url': 'https://www.youtube.com/gametrailers',

2824

'only_matching': True,

2825

}, {

2826

# This channel is not available, geo restricted to JP

2827

'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',

2828

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2833

# Don't return True if the url can be extracted with other youtube

2834

# extractor, the regex would is too permissive and it would match.

2835

other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)

2836

if any(ie.suitable(url) for ie in other_yt_ies):

2837

return False

2838

else:

2839

return super(YoutubeUserIE, cls).suitable(url)

2840

2841

def _build_template_url(self, url, channel_id):

2842

mobj = re.match(self._VALID_URL, url)

2843

return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))

2844

2845

2846

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

2847

IE_DESC = 'YouTube.com live streams'

2848

_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'

2849

IE_NAME = 'youtube:live'

2850

2851

_TESTS = [{

2852

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2857

'uploader': 'The Young Turks',

2858

'uploader_id': 'TheYoungTurks',

2859

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2860

'upload_date': '20150715',

2861

'license': 'Standard YouTube License',

2862

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2863

'categories': ['News & Politics'],

2864

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2865

'like_count': int,

2866

'dislike_count': int,

2867

},

2868

'params': {

2869

'skip_download': True,

2870

},

2871

}, {

2872

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2873

'only_matching': True,

2874

}, {

2875

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2876

'only_matching': True,

2877

}, {

2878

'url': 'https://www.youtube.com/TheYoungTurks/live',

2879

'only_matching': True,

2880

}]

2881

2882

def _real_extract(self, url):

2883

mobj = re.match(self._VALID_URL, url)

2884

channel_id = mobj.group('id')

2885

base_url = mobj.group('base_url')

2886

webpage = self._download_webpage(url, channel_id, fatal=False)

2887

if webpage:

2888

page_type = self._og_search_property(

2889

'type', webpage, 'page type', default='')

2890

video_id = self._html_search_meta(

2891

'videoId', webpage, 'video id', default=None)

2892

if page_type.startswith('video') and video_id and re.match(

2893

r'^[0-9A-Za-z_-]{11}$', video_id):

2894

return self.url_result(video_id, YoutubeIE.ie_key())

2895

return self.url_result(base_url)

2896

2897

2898

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

2899

IE_DESC = 'YouTube.com user/channel playlists'

2900

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'

2901

IE_NAME = 'youtube:playlists'

2902

2903

_TESTS = [{

2904

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2905

'playlist_mincount': 4,

2906

'info_dict': {

2907

'id': 'ThirstForScience',

2908

'title': 'Thirst for Science',

2909

},

2910

}, {

2911

# with "Load more" button

2912

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2913

'playlist_mincount': 70,

2914

'info_dict': {

2915

'id': 'igorkle1',

2916

'title': 'Игорь Клейнер',

2917

},

2918

}, {

2919

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

2920

'playlist_mincount': 17,

2921

'info_dict': {

2922

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

2923

'title': 'Chem Player',

},

}]

class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):

2929

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'

2930

2931

2932

class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):

2933

IE_DESC = 'YouTube.com searches'

2934

# there doesn't appear to be a real limit, for example if you search for

2935

# 'python' you get more than 8.000.000 results

2936

_MAX_RESULTS = float('inf')

2937

IE_NAME = 'youtube:search'

2938

_SEARCH_KEY = 'ytsearch'

2939

_EXTRA_QUERY_ARGS = {}

2940

_TESTS = []

2941

2942

def _get_n_results(self, query, n):

2943

"""Get a specified number of results for a query"""

videos = []

limit = n

url_query = {

'search_query': query.encode('utf-8'),

2950

}

2951

url_query.update(self._EXTRA_QUERY_ARGS)

2952

result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)

2953

2954

for pagenum in itertools.count(1):

2955

data = self._download_json(

2956

result_url, video_id='query "%s"' % query,

2957

note='Downloading page %s' % pagenum,

2958

errnote='Unable to download API page',

2959

query={'spf': 'navigate'})

2960

html_content = data[1]['body']['content']

2961

2962

if 'class="search-message' in html_content:

2963

raise ExtractorError(

2964

'[youtube] No video results', expected=True)

2965

2966

new_videos = list(self._process_page(html_content))

2967

videos += new_videos

2968

if not new_videos or len(videos) > limit:

2969

break

2970

next_link = self._html_search_regex(

2971

r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',

2972

html_content, 'next link', default=None)

2973

if next_link is None:

2974

break

2975

result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)

if len(videos) > n:

videos = videos[:n]

return self.playlist_result(videos, query)

2980

2981

2982

class YoutubeSearchDateIE(YoutubeSearchIE):

2983

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

2984

_SEARCH_KEY = 'ytsearchdate'

2985

IE_DESC = 'YouTube.com searches, newest videos first'

2986

_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}

2987

2988

2989

class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):

2990

IE_DESC = 'YouTube.com search URLs'

2991

IE_NAME = 'youtube:search_url'

2992

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

2993

_TESTS = [{

2994

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

2995

'playlist_mincount': 5,

2996

'info_dict': {

2997

'title': 'youtube-dl test video',

2998

}

2999

}, {

3000

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3001

'only_matching': True,

3002

}]

3003

3004

def _real_extract(self, url):

3005

mobj = re.match(self._VALID_URL, url)

3006

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

3007

webpage = self._download_webpage(url, query)

3008

return self.playlist_result(self._process_page(webpage), playlist_title=query)

3009

3010

3011

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

3012

IE_DESC = 'YouTube.com (multi-season) shows'

3013

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3014

IE_NAME = 'youtube:show'

3015

_TESTS = [{

3016

'url': 'https://www.youtube.com/show/airdisasters',

3017

'playlist_mincount': 5,

3018

'info_dict': {

3019

'id': 'airdisasters',

3020

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3025

playlist_id = self._match_id(url)

3026

return super(YoutubeShowIE, self)._real_extract(

3027

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3028

3029

3030

class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

3031

"""

3032

Base class for feed extractors

3033

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

3034

"""

3035

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3040

3041

def _real_initialize(self):

3042

self._login()

3043

3044

def _entries(self, page):

3045

# The extraction process is the same as for playlists, but the regex

3046

# for the video ids doesn't contain an index

3047

ids = []

3048

more_widget_html = content_html = page

3049

for page_num in itertools.count(1):

3050

matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)

3051

3052

# 'recommended' feed has infinite 'load more' and each new portion spins

3053

# the same videos in (sometimes) slightly different order, so we'll check

3054

# for unicity and break when portion has no new videos

3055

new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))

if not new_ids:

break

ids.extend(new_ids)

for entry in self._ids_to_results(new_ids):

3062

yield entry

3063

3064

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

3069

'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,

3070

'Downloading page #%s' % page_num,

3071

transform_source=uppercase_escape)

3072

content_html = more['content_html']

3073

more_widget_html = more['load_more_widget_html']

3074

3075

def _real_extract(self, url):

3076

page = self._download_webpage(

3077

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3078

self._PLAYLIST_TITLE)

3079

return self.playlist_result(

3080

self._entries(page), playlist_title=self._PLAYLIST_TITLE)

3081

3082

3083

class YoutubeWatchLaterIE(YoutubePlaylistIE):

3084

IE_NAME = 'youtube:watchlater'

3085

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3086

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

3087

3088

_TESTS = [{

3089

'url': 'https://www.youtube.com/playlist?list=WL',

3090

'only_matching': True,

3091

}, {

3092

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

3093

'only_matching': True,

3094

}]

3095

3096

def _real_extract(self, url):

3097

_, video = self._check_download_just_video(url, 'WL')

3098

if video:

3099

return video

3100

_, playlist = self._extract_playlist('WL')

return playlist

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3105

IE_NAME = 'youtube:favorites'

3106

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

3107

_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

3108

_LOGIN_REQUIRED = True

3109

3110

def _real_extract(self, url):

3111

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

3112

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

3113

return self.url_result(playlist_id, 'YoutubePlaylist')

3114

3115

3116

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3117

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3118

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'

3119

_FEED_NAME = 'recommended'

3120

_PLAYLIST_TITLE = 'Youtube Recommended videos'

3121

3122

3123

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3124

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

3125

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

3126

_FEED_NAME = 'subscriptions'

3127

_PLAYLIST_TITLE = 'Youtube Subscriptions'

3128

3129

3130

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3131

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3132

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

3133

_FEED_NAME = 'history'

3134

_PLAYLIST_TITLE = 'Youtube History'

3135

3136

3137

class YoutubeTruncatedURLIE(InfoExtractor):

3138

IE_NAME = 'youtube:truncated_url'

3139

IE_DESC = False # Do not list

3140

_VALID_URL = r'''(?x)

3141

(?:https?://)?

3142

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3143

(?:watch\?(?:

3144

feature=[a-z_]+|

3145

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3158

'only_matching': True,

3159

}, {

3160

'url': 'https://www.youtube.com/watch?',

3161

'only_matching': True,

3162

}, {

3163

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3164

'only_matching': True,

3165

}, {

3166

'url': 'https://www.youtube.com/watch?feature=foo',

3167

'only_matching': True,

3168

}, {

3169

'url': 'https://www.youtube.com/watch?hl=en-GB',

3170

'only_matching': True,

3171

}, {

3172

'url': 'https://www.youtube.com/watch?t=2372',

3173

'only_matching': True,

3174

}]

3175

3176

def _real_extract(self, url):

3177

raise ExtractorError(

3178

'Did you forget to quote the URL? Remember that & is a meta '

3179

'character in most shells, so you want to put the URL in quotes, '

3180

'like youtube-dl '

3181

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3182

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3187

IE_NAME = 'youtube:truncated_id'

3188

IE_DESC = False # Do not list

3189

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3190

3191

_TESTS = [{

3192

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3193

'only_matching': True,

3194

}]

3195

3196

def _real_extract(self, url):

3197

video_id = self._match_id(url)

3198

raise ExtractorError(

3199

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

3200

expected=True)