jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	bool_or_none,
	31	clean_html,
	32	error_to_compat_str,
	33	ExtractorError,
	34	float_or_none,
	35	get_element_by_id,
	36	int_or_none,
	37	mimetype2ext,
	38	parse_codecs,
	39	parse_count,
	40	parse_duration,
	41	remove_quotes,
	42	remove_start,
	43	smuggle_url,
	44	str_or_none,
	45	str_to_int,
	46	try_get,
	47	unescapeHTML,
	48	unified_strdate,
	49	unsmuggle_url,
	50	update_url_query,
	51	uppercase_escape,
	52	url_or_none,
	53	urlencode_postdata,
	54	urljoin,
	55	)
	56
	57
	58	class YoutubeBaseInfoExtractor(InfoExtractor):
	59	"""Provide base functions for Youtube extractors"""
	60	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	61	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	62
	63	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	64	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	65	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	66
	67	_RESERVED_NAMES = (
	68	r'embed\|e\|watch_popup\|channel\|c\|user\|playlist\|watch\|w\|v\|movies\|results\|shared\|'
	69	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout\|'
	70	r'feed/(?:watch_later\|history\|subscriptions\|library\|trending\|recommended)')
	71
	72	_NETRC_MACHINE = 'youtube'
	73	# If True it will raise an error if no login info is provided
	74	_LOGIN_REQUIRED = False
	75
	76	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	77
	78	def _set_language(self):
	79	self._set_cookie(
	80	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	81	# YouTube sets the expire time to about two months
	82	expire_time=time.time() + 2 * 30 * 24 * 3600)
	83
	84	def _ids_to_results(self, ids):
	85	return [
	86	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	87	for vid_id in ids]
	88
	89	def _login(self):
	90	"""
	91	Attempt to log in to YouTube.
	92	True is returned if successful or skipped.
	93	False is returned if login failed.
	94
	95	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	96	"""
	97	username, password = self._get_login_info()
	98	# No authentication to be performed
	99	if username is None:
	100	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	101	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	102	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	103	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	104	return True
	105
	106	login_page = self._download_webpage(
	107	self._LOGIN_URL, None,
	108	note='Downloading login page',
	109	errnote='unable to fetch login page', fatal=False)
	110	if login_page is False:
	111	return
	112
	113	login_form = self._hidden_inputs(login_page)
	114
	115	def req(url, f_req, note, errnote):
	116	data = login_form.copy()
	117	data.update({
	118	'pstMsg': 1,
	119	'checkConnection': 'youtube',
	120	'checkedDomains': 'youtube',
	121	'hl': 'en',
	122	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	123	'f.req': json.dumps(f_req),
	124	'flowName': 'GlifWebSignIn',
	125	'flowEntry': 'ServiceLogin',
	126	# TODO: reverse actual botguard identifier generation algo
	127	'bgRequest': '["identifier",""]',
	128	})
	129	return self._download_json(
	130	url, None, note=note, errnote=errnote,
	131	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	132	fatal=False,
	133	data=urlencode_postdata(data), headers={
	134	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	135	'Google-Accounts-XSRF': 1,
	136	})
	137
	138	def warn(message):
	139	self._downloader.report_warning(message)
	140
	141	lookup_req = [
	142	username,
	143	None, [], None, 'US', None, None, 2, False, True,
	144	[
	145	None, None,
	146	[2, 1, None, 1,
	147	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	148	None, [], 4],
	149	1, [None, None, []], None, None, None, True
	150	],
	151	username,
	152	]
	153
	154	lookup_results = req(
	155	self._LOOKUP_URL, lookup_req,
	156	'Looking up account info', 'Unable to look up account info')
	157
	158	if lookup_results is False:
	159	return False
	160
	161	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	162	if not user_hash:
	163	warn('Unable to extract user hash')
	164	return False
	165
	166	challenge_req = [
	167	user_hash,
	168	None, 1, None, [1, None, None, None, [password, None, True]],
	169	[
	170	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	171	1, [None, None, []], None, None, None, True
	172	]]
	173
	174	challenge_results = req(
	175	self._CHALLENGE_URL, challenge_req,
	176	'Logging in', 'Unable to log in')
	177
	178	if challenge_results is False:
	179	return
	180
	181	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	182	if login_res:
	183	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	184	warn(
	185	'Unable to login: %s' % 'Invalid password'
	186	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	187	return False
	188
	189	res = try_get(challenge_results, lambda x: x[0][-1], list)
	190	if not res:
	191	warn('Unable to extract result entry')
	192	return False
	193
	194	login_challenge = try_get(res, lambda x: x[0][0], list)
	195	if login_challenge:
	196	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	197	if challenge_str == 'TWO_STEP_VERIFICATION':
	198	# SEND_SUCCESS - TFA code has been successfully sent to phone
	199	# QUOTA_EXCEEDED - reached the limit of TFA codes
	200	status = try_get(login_challenge, lambda x: x[5], compat_str)
	201	if status == 'QUOTA_EXCEEDED':
	202	warn('Exceeded the limit of TFA codes, try later')
	203	return False
	204
	205	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	206	if not tl:
	207	warn('Unable to extract TL')
	208	return False
	209
	210	tfa_code = self._get_tfa_info('2-step verification code')
	211
	212	if not tfa_code:
	213	warn(
	214	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	215	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	216	return False
	217
	218	tfa_code = remove_start(tfa_code, 'G-')
	219
	220	tfa_req = [
	221	user_hash, None, 2, None,
	222	[
	223	9, None, None, None, None, None, None, None,
	224	[None, tfa_code, True, 2]
	225	]]
	226
	227	tfa_results = req(
	228	self._TFA_URL.format(tl), tfa_req,
	229	'Submitting TFA code', 'Unable to submit TFA code')
	230
	231	if tfa_results is False:
	232	return False
	233
	234	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	235	if tfa_res:
	236	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	237	warn(
	238	'Unable to finish TFA: %s' % 'Invalid TFA code'
	239	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	240	return False
	241
	242	check_cookie_url = try_get(
	243	tfa_results, lambda x: x[0][-1][2], compat_str)
	244	else:
	245	CHALLENGES = {
	246	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	247	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	248	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	249	}
	250	challenge = CHALLENGES.get(
	251	challenge_str,
	252	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	253	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	254	return False
	255	else:
	256	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	257
	258	if not check_cookie_url:
	259	warn('Unable to extract CheckCookie URL')
	260	return False
	261
	262	check_cookie_results = self._download_webpage(
	263	check_cookie_url, None, 'Checking cookie', fatal=False)
	264
	265	if check_cookie_results is False:
	266	return False
	267
	268	if 'https://myaccount.google.com/' not in check_cookie_results:
	269	warn('Unable to log in')
	270	return False
	271
	272	return True
	273
	274	def _download_webpage_handle(self, args, *kwargs):
	275	query = kwargs.get('query', {}).copy()
	276	kwargs['query'] = query
	277	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	278	args, *compat_kwargs(kwargs))
	279
	280	def _get_yt_initial_data(self, video_id, webpage):
	281	config = self._search_regex(
	282	(r'window\["ytInitialData"\]\s=\s(.*?)(?<=});',
	283	r'var\s+ytInitialData\s=\s(.*?)(?<=});'),
	284	webpage, 'ytInitialData', default=None)
	285	if config:
	286	return self._parse_json(
	287	uppercase_escape(config), video_id, fatal=False)
	288
	289	def _real_initialize(self):
	290	if self._downloader is None:
	291	return
	292	self._set_language()
	293	if not self._login():
	294	return
	295
	296	_DEFAULT_API_DATA = {
	297	'context': {
	298	'client': {
	299	'clientName': 'WEB',
	300	'clientVersion': '2.20201021.03.00',
	301	}
	302	},
	303	}
	304
	305	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	306	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	307	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	308
	309	def _call_api(self, ep, query, video_id):
	310	data = self._DEFAULT_API_DATA.copy()
	311	data.update(query)
	312
	313	response = self._download_json(
	314	'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
	315	note='Downloading API JSON', errnote='Unable to download API page',
	316	data=json.dumps(data).encode('utf8'),
	317	headers={'content-type': 'application/json'},
	318	query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
	319
	320	return response
	321
	322	def _extract_yt_initial_data(self, video_id, webpage):
	323	return self._parse_json(
	324	self._search_regex(
	325	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	326	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
	327	video_id)
	328
	329	def _extract_ytcfg(self, video_id, webpage):
	330	return self._parse_json(
	331	self._search_regex(
	332	r'ytcfg\.set\s\(\s({.+?})\s\)\s;', webpage, 'ytcfg',
	333	default='{}'), video_id, fatal=False)
	334
	335
	336	class YoutubeIE(YoutubeBaseInfoExtractor):
	337	IE_DESC = 'YouTube.com'
	338	_VALID_URL = r"""(?x)^
	339	(
	340	(?:https?://\|//) # http(s):// or protocol-independent URL
	341	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	342	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	343	(?:www\.)?pwnyoutube\.com/\|
	344	(?:www\.)?hooktube\.com/\|
	345	(?:www\.)?yourepeat\.com/\|
	346	tube\.majestyc\.net/\|
	347	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	348	(?:(?:www\|dev)\.)?invidio\.us/\|
	349	(?:(?:www\|no)\.)?invidiou\.sh/\|
	350	(?:(?:www\|fi)\.)?invidious\.snopyta\.org/\|
	351	(?:www\.)?invidious\.kabi\.tk/\|
	352	(?:www\.)?invidious\.13ad\.de/\|
	353	(?:www\.)?invidious\.mastodon\.host/\|
	354	(?:www\.)?invidious\.zapashcanon\.fr/\|
	355	(?:www\.)?invidious\.kavin\.rocks/\|
	356	(?:www\.)?invidious\.tube/\|
	357	(?:www\.)?invidiou\.site/\|
	358	(?:www\.)?invidious\.site/\|
	359	(?:www\.)?invidious\.xyz/\|
	360	(?:www\.)?invidious\.nixnet\.xyz/\|
	361	(?:www\.)?invidious\.drycat\.fr/\|
	362	(?:www\.)?tube\.poal\.co/\|
	363	(?:www\.)?tube\.connect\.cafe/\|
	364	(?:www\.)?vid\.wxzm\.sx/\|
	365	(?:www\.)?vid\.mint\.lgbt/\|
	366	(?:www\.)?yewtu\.be/\|
	367	(?:www\.)?yt\.elukerio\.org/\|
	368	(?:www\.)?yt\.lelux\.fi/\|
	369	(?:www\.)?invidious\.ggc-project\.de/\|
	370	(?:www\.)?yt\.maisputain\.ovh/\|
	371	(?:www\.)?invidious\.13ad\.de/\|
	372	(?:www\.)?invidious\.toot\.koeln/\|
	373	(?:www\.)?invidious\.fdn\.fr/\|
	374	(?:www\.)?watch\.nettohikari\.com/\|
	375	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	376	(?:www\.)?qklhadlycap4cnod\.onion/\|
	377	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	378	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	379	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	380	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	381	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	382	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	383	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	384	(?:.*?\#/)? # handle anchor (#/) redirect urls
	385	(?: # the various things that can precede the ID:
	386	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	387	\|(?: # or the v= param in all its forms
	388	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	389	(?:\?\|\#!?) # the params delimiter ? or # or #!
	390	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	391	v=
	392	)
	393	))
	394	\|(?:
	395	youtu\.be\| # just youtu.be/xxxx
	396	vid\.plus\| # or vid.plus/xxxx
	397	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	398	)/
	399	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	400	)
	401	)? # all until now is optional -> you can pass the naked ID
	402	(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	403	(?!.*?\blist=
	404	(?:
	405	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	406	WL # WL are handled by the watch later IE
	407	)
	408	)
	409	(?(1).+)? # if we found the ID, everything can follow
	410	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	411	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	412	_PLAYER_INFO_RE = (
	413	r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
	414	r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
	415	)
	416	_formats = {
	417	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	418	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	419	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	420	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	421	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	422	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	423	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	424	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	425	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	426	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	427	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	428	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	429	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	430	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	431	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	432	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	433	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	434	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	435
	436
	437	# 3D videos
	438	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	439	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	440	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	441	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	442	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	443	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	444	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	445
	446	# Apple HTTP Live Streaming
	447	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	448	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	449	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	450	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	451	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	452	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	453	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	454	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	455
	456	# DASH mp4 video
	457	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	458	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	459	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	460	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	461	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	462	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	463	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	464	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	465	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	466	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	467	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	468	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	469
	470	# Dash mp4 audio
	471	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
	472	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
	473	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
	474	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	475	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	476	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
	477	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
	478
	479	# Dash webm
	480	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	481	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	482	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	483	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	484	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	485	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	486	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
	487	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	488	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	489	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	490	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	491	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	492	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	493	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	494	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	495	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	496	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	497	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	498	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	499	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	500	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_id,

int_or_none,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

uppercase_escape,

url_or_none,

urlencode_postdata,

urljoin,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

59

"""Provide base functions for Youtube extractors"""

60

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

61

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

62

63

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

64

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

65

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

73

# If True it will raise an error if no login info is provided

74

_LOGIN_REQUIRED = False

75

76

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

77

78

def _set_language(self):

79

self._set_cookie(

80

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

81

# YouTube sets the expire time to about two months

82

expire_time=time.time() + 2 * 30 * 24 * 3600)

83

84

def _ids_to_results(self, ids):

85

return [

86

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

92

True is returned if successful or skipped.

93

False is returned if login failed.

94

95

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

96

"""

97

username, password = self._get_login_info()

98

# No authentication to be performed

99

if username is None:

100

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

101

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

102

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

103

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

104

return True

105

106

login_page = self._download_webpage(

107

self._LOGIN_URL, None,

108

note='Downloading login page',

109

errnote='unable to fetch login page', fatal=False)

110

if login_page is False:

111

return

112

113

login_form = self._hidden_inputs(login_page)

114

115

def req(url, f_req, note, errnote):

116

data = login_form.copy()

117

data.update({

118

'pstMsg': 1,

119

'checkConnection': 'youtube',

120

'checkedDomains': 'youtube',

121

'hl': 'en',

122

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

123

'f.req': json.dumps(f_req),

124

'flowName': 'GlifWebSignIn',

125

'flowEntry': 'ServiceLogin',

126

# TODO: reverse actual botguard identifier generation algo

127

'bgRequest': '["identifier",""]',

128

})

129

return self._download_json(

130

url, None, note=note, errnote=errnote,

131

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

132

fatal=False,

133

data=urlencode_postdata(data), headers={

134

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

135

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

148

None, [], 4],

149

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

155

self._LOOKUP_URL, lookup_req,

156

'Looking up account info', 'Unable to look up account info')

157

158

if lookup_results is False:

159

return False

160

161

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

162

if not user_hash:

163

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

169

[

170

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

171

1, [None, None, []], None, None, None, True

172

]]

173

174

challenge_results = req(

175

self._CHALLENGE_URL, challenge_req,

176

'Logging in', 'Unable to log in')

177

178

if challenge_results is False:

179

return

180

181

login_res = try_get(challenge_results, lambda x: x[0][5], list)

182

if login_res:

183

login_msg = try_get(login_res, lambda x: x[5], compat_str)

184

warn(

185

'Unable to login: %s' % 'Invalid password'

186

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

187

return False

188

189

res = try_get(challenge_results, lambda x: x[0][-1], list)

190

if not res:

191

warn('Unable to extract result entry')

192

return False

193

194

login_challenge = try_get(res, lambda x: x[0][0], list)

195

if login_challenge:

196

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

197

if challenge_str == 'TWO_STEP_VERIFICATION':

198

# SEND_SUCCESS - TFA code has been successfully sent to phone

199

# QUOTA_EXCEEDED - reached the limit of TFA codes

200

status = try_get(login_challenge, lambda x: x[5], compat_str)

201

if status == 'QUOTA_EXCEEDED':

202

warn('Exceeded the limit of TFA codes, try later')

203

return False

204

205

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

206

if not tl:

207

warn('Unable to extract TL')

208

return False

209

210

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

215

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

216

return False

217

218

tfa_code = remove_start(tfa_code, 'G-')

219

220

tfa_req = [

221

user_hash, None, 2, None,

222

[

223

9, None, None, None, None, None, None, None,

224

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

229

'Submitting TFA code', 'Unable to submit TFA code')

230

231

if tfa_results is False:

232

return False

233

234

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

235

if tfa_res:

236

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

237

warn(

238

'Unable to finish TFA: %s' % 'Invalid TFA code'

239

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

240

return False

241

242

check_cookie_url = try_get(

243

tfa_results, lambda x: x[0][-1][2], compat_str)

244

else:

245

CHALLENGES = {

246

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

247

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

248

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

249

}

250

challenge = CHALLENGES.get(

251

challenge_str,

252

'%s returned error %s.' % (self.IE_NAME, challenge_str))

253

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

254

return False

255

else:

256

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

257

258

if not check_cookie_url:

259

warn('Unable to extract CheckCookie URL')

260

return False

261

262

check_cookie_results = self._download_webpage(

263

check_cookie_url, None, 'Checking cookie', fatal=False)

264

265

if check_cookie_results is False:

266

return False

267

268

if 'https://myaccount.google.com/' not in check_cookie_results:

269

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

275

query = kwargs.get('query', {}).copy()

276

kwargs['query'] = query

277

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

278

*args, **compat_kwargs(kwargs))

279

280

def _get_yt_initial_data(self, video_id, webpage):

281

config = self._search_regex(

282

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

283

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

284

webpage, 'ytInitialData', default=None)

285

if config:

286

return self._parse_json(

287

uppercase_escape(config), video_id, fatal=False)

288

289

def _real_initialize(self):

290

if self._downloader is None:

291

return

292

self._set_language()

293

if not self._login():

294

return

295

296

_DEFAULT_API_DATA = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

}

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

306

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

307

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

308

309

def _call_api(self, ep, query, video_id):

310

data = self._DEFAULT_API_DATA.copy()

311

data.update(query)

312

313

response = self._download_json(

314

'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,

315

note='Downloading API JSON', errnote='Unable to download API page',

316

data=json.dumps(data).encode('utf8'),

317

headers={'content-type': 'application/json'},

318

query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})

return response

def _extract_yt_initial_data(self, video_id, webpage):

323

return self._parse_json(

324

self._search_regex(

325

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

326

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),

327

video_id)

328

329

def _extract_ytcfg(self, video_id, webpage):

330

return self._parse_json(

331

self._search_regex(

332

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

333

default='{}'), video_id, fatal=False)

334

335

336

class YoutubeIE(YoutubeBaseInfoExtractor):

337

IE_DESC = 'YouTube.com'

338

_VALID_URL = r"""(?x)^

339

(

340

(?:https?://|//) # http(s):// or protocol-independent URL

341

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

342

(?:www\.)?deturl\.com/www\.youtube\.com/|

343

(?:www\.)?pwnyoutube\.com/|

344

(?:www\.)?hooktube\.com/|

345

(?:www\.)?yourepeat\.com/|

346

tube\.majestyc\.net/|

347

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

348

(?:(?:www|dev)\.)?invidio\.us/|

349

(?:(?:www|no)\.)?invidiou\.sh/|

350

(?:(?:www|fi)\.)?invidious\.snopyta\.org/|

351

(?:www\.)?invidious\.kabi\.tk/|

352

(?:www\.)?invidious\.13ad\.de/|

353

(?:www\.)?invidious\.mastodon\.host/|

354

(?:www\.)?invidious\.zapashcanon\.fr/|

355

(?:www\.)?invidious\.kavin\.rocks/|

356

(?:www\.)?invidious\.tube/|

357

(?:www\.)?invidiou\.site/|

358

(?:www\.)?invidious\.site/|

359

(?:www\.)?invidious\.xyz/|

360

(?:www\.)?invidious\.nixnet\.xyz/|

361

(?:www\.)?invidious\.drycat\.fr/|

362

(?:www\.)?tube\.poal\.co/|

363

(?:www\.)?tube\.connect\.cafe/|

364

(?:www\.)?vid\.wxzm\.sx/|

365

(?:www\.)?vid\.mint\.lgbt/|

366

(?:www\.)?yewtu\.be/|

367

(?:www\.)?yt\.elukerio\.org/|

368

(?:www\.)?yt\.lelux\.fi/|

369

(?:www\.)?invidious\.ggc-project\.de/|

370

(?:www\.)?yt\.maisputain\.ovh/|

371

(?:www\.)?invidious\.13ad\.de/|

372

(?:www\.)?invidious\.toot\.koeln/|

373

(?:www\.)?invidious\.fdn\.fr/|

374

(?:www\.)?watch\.nettohikari\.com/|

375

(?:www\.)?kgg2m7yk5aybusll\.onion/|

376

(?:www\.)?qklhadlycap4cnod\.onion/|

377

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

378

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

379

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

380

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

381

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

382

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

383

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

384

(?:.*?\#/)? # handle anchor (#/) redirect urls

385

(?: # the various things that can precede the ID:

386

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

387

|(?: # or the v= param in all its forms

388

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

389

(?:\?|\#!?) # the params delimiter ? or # or #!

390

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

396

vid\.plus| # or vid.plus/xxxx

397

zwearz\.com/watch| # or zwearz.com/watch/xxxx

398

)/

399

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

400

)

401

)? # all until now is optional -> you can pass the naked ID

402

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

403

(?!.*?\blist=

404

(?:

405

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

406

WL # WL are handled by the watch later IE

407

)

408

)

409

(?(1).+)? # if we found the ID, everything can follow

410

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

411

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

412

_PLAYER_INFO_RE = (

413

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

414

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

415

)

416

_formats = {

417

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

418

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

419

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

420

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

421

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

422

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

423

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

424

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

425

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

426

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

427

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

428

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

429

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

430

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

431

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

432

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

433

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

434

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

439

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

440

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

441

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

442

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

443

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

444

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

445

446

# Apple HTTP Live Streaming

447

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

448

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

449

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

450

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

451

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

452

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

453

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

454

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

455

456

# DASH mp4 video

457

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

458

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

459

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

460

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

461

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

462

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

463

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

464

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

465

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

466

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

467

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

468

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

469

470

# Dash mp4 audio

471

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

472

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

473

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

474

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

475

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

476

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

477

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

478

479

# Dash webm

480

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

481

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

482

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

483

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

484

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

485

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

486

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

487

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

488

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

489

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

490

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

491

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

492

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

493

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

494

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

495

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

496

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

497

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

498

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

499

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

500

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

501

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

502

503

# Dash webm audio

504

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

505

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

506

507

# Dash webm audio with opus inside

508

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

509

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

510

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

511

512

# RTMP (unnamed)

513

'_rtmp': {'protocol': 'rtmp'},

514

515

# av01 video only formats sometimes served with "unknown" codecs

516

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

517

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

518

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

519

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

520

}

521

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

533

'uploader': 'Philipp Hagemeister',

534

'uploader_id': 'phihag',

535

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

536

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

537

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

538

'upload_date': '20121002',

539

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

540

'categories': ['Science & Technology'],

541

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

552

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

557

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

558

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

559

'uploader': 'SET India',

560

'uploader_id': 'setindia',

561

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

567

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

572

'uploader': 'Philipp Hagemeister',

573

'uploader_id': 'phihag',

574

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

575

'upload_date': '20121002',

576

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

577

'categories': ['Science & Technology'],

578

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

583

},

584

'params': {

585

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

590

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

595

'uploader_id': '8KVIDEO',

596

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

597

'description': '',

598

'uploader': '8KVIDEO',

599

'title': 'UHDTV TEST 8K VIDEO.mp4'

600

},

601

'params': {

602

'youtube_include_dash_manifest': True,

603

'format': '141',

604

},

605

'skip': 'format 141 not served anymore',

606

},

607

# DASH manifest with encrypted signature

608

{

609

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

614

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

615

'duration': 244,

616

'uploader': 'AfrojackVEVO',

617

'uploader_id': 'AfrojackVEVO',

618

'upload_date': '20131011',

619

},

620

'params': {

621

'youtube_include_dash_manifest': True,

622

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

633

'uploader': 'Amazing Atheist',

634

'uploader_id': 'TheAmazingAtheist',

635

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

636

'title': 'Burning Everyone\'s Koran',

637

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

638

}

639

},

640

# Normal age-gate video (embed allowed)

641

{

642

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

647

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

648

'duration': 142,

649

'uploader': 'The Witcher',

650

'uploader_id': 'WitcherGame',

651

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

652

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

657

# YouTube Red ad is not captured for creator

658

{

659

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

665

'uploader_id': 'deadmau5',

666

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

667

'creator': 'Dada Life, deadmau5',

668

'description': 'md5:12c56784b8032162bb936a5f76d55360',

669

'uploader': 'deadmau5',

670

'title': 'Deadmau5 - Some Chords (HD)',

671

'alt_title': 'This Machine Kills Some Chords',

672

},

673

'expected_warnings': [

674

'DASH manifest missing',

675

]

676

},

677

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

678

{

679

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

685

'uploader_id': 'olympic',

686

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

687

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

688

'uploader': 'Olympic',

689

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

690

},

691

'params': {

692

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

702

'duration': 85,

703

'upload_date': '20110310',

704

'uploader_id': 'AllenMeow',

705

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

706

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

707

'uploader': '孫ᄋᄅ',

708

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

709

},

710

},

711

# url_encoded_fmt_stream_map is empty string

712

{

713

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

718

'description': '',

719

'upload_date': '20150404',

720

'uploader_id': 'spbelect',

721

'uploader': 'Наблюдатели Петербурга',

722

},

723

'params': {

724

'skip_download': 'requires avconv',

725

},

726

'skip': 'This live event has ended.',

727

},

728

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

729

{

730

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

735

'description': 'md5:116377fd2963b81ec4ce64b542173306',

736

'duration': 220,

737

'upload_date': '20150625',

738

'uploader_id': 'dorappi2000',

739

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

740

'uploader': 'dorappi2000',

741

'formats': 'mincount:31',

742

},

743

'skip': 'not actual anymore',

744

},

745

# DASH manifest with segment_list

746

{

747

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

748

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

753

'uploader': 'Airtek',

754

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

755

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

756

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

757

},

758

'params': {

759

'youtube_include_dash_manifest': True,

760

'format': '135', # bestvideo

761

},

762

'skip': 'This live event has ended.',

763

},

764

{

765

# Multifeed videos (multiple cameras), URL is for Main Camera

766

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

767

'info_dict': {

768

'id': 'jqWvoWXjCVs',

769

'title': 'teamPGP: Rocket League Noob Stream',

770

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

777

'description': 'md5:dc7872fb300e143831327f1bae3af010',

778

'duration': 7335,

779

'upload_date': '20150721',

780

'uploader': 'Beer Games Beer',

781

'uploader_id': 'beergamesbeer',

782

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

783

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

790

'description': 'md5:dc7872fb300e143831327f1bae3af010',

791

'duration': 7337,

792

'upload_date': '20150721',

793

'uploader': 'Beer Games Beer',

794

'uploader_id': 'beergamesbeer',

795

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

796

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

803

'description': 'md5:dc7872fb300e143831327f1bae3af010',

804

'duration': 7337,

805

'upload_date': '20150721',

806

'uploader': 'Beer Games Beer',

807

'uploader_id': 'beergamesbeer',

808

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

809

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

816

'description': 'md5:dc7872fb300e143831327f1bae3af010',

817

'duration': 7334,

818

'upload_date': '20150721',

819

'uploader': 'Beer Games Beer',

820

'uploader_id': 'beergamesbeer',

821

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

822

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

827

},

828

'skip': 'This video is not available.',

829

},

830

{

831

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

832

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

833

'info_dict': {

834

'id': 'gVfLd0zydlo',

835

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

836

},

837

'playlist_count': 2,

838

'skip': 'Not multifeed anymore',

839

},

840

{

841

'url': 'https://vid.plus/FlRa-iH7PGw',

842

'only_matching': True,

843

},

844

{

845

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

846

'only_matching': True,

847

},

848

{

849

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

850

# Also tests cut-off URL expansion in video description (see

851

# https://github.com/ytdl-org/youtube-dl/issues/1892,

852

# https://github.com/ytdl-org/youtube-dl/issues/8164)

853

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

858

'alt_title': 'Dark Walk - Position Music',

859

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

860

'duration': 133,

861

'upload_date': '20151119',

862

'uploader_id': 'IronSoulElf',

863

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

864

'uploader': 'IronSoulElf',

865

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

866

'track': 'Dark Walk - Position Music',

867

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

868

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

869

},

870

'params': {

871

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

876

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

877

'only_matching': True,

878

},

879

{

880

# Video with yt:stretch=17:0

881

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

886

'description': 'md5:ee18a25c350637c8faff806845bddee9',

887

'upload_date': '20151107',

888

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

889

'uploader': 'CH GAMER DROID',

890

},

891

'params': {

892

'skip_download': True,

893

},

894

'skip': 'This video does not exist.',

895

},

896

{

897

# Video licensed under Creative Commons

898

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

903

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

904

'duration': 721,

905

'upload_date': '20150127',

906

'uploader_id': 'BerkmanCenter',

907

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

908

'uploader': 'The Berkman Klein Center for Internet & Society',

909

'license': 'Creative Commons Attribution license (reuse allowed)',

910

},

911

'params': {

912

'skip_download': True,

},

},

{

# Channel-like uploader_url

917

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

922

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

923

'duration': 4060,

924

'upload_date': '20151119',

925

'uploader': 'Bernie Sanders',

926

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

927

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

928

'license': 'Creative Commons Attribution license (reuse allowed)',

929

},

930

'params': {

931

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

936

'only_matching': True,

937

},

938

{

939

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

940

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

941

'only_matching': True,

942

},

943

{

944

# Rental video preview

945

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

950

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

951

'upload_date': '20150811',

952

'uploader': 'FlixMatrix',

953

'uploader_id': 'FlixMatrixKaravan',

954

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

955

'license': 'Standard YouTube License',

956

},

957

'params': {

958

'skip_download': True,

959

},

960

'skip': 'This video is not available.',

961

},

962

{

963

# YouTube Red video with episode data

964

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

969

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

970

'duration': 2085,

971

'upload_date': '20170118',

972

'uploader': 'Vsauce',

973

'uploader_id': 'Vsauce',

974

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

975

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

981

},

982

'expected_warnings': [

983

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

988

# as inappropriate or offensive to some audiences.

989

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

994

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

995

'duration': 965,

996

'upload_date': '20140124',

997

'uploader': 'New Century Foundation',

998

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

999

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1000

},

1001

'params': {

1002

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1008

'only_matching': True,

1009

},

1010

{

1011

# geo restricted to JP

1012

'url': 'sJL6WA-aGkQ',

1013

'only_matching': True,

1014

},

1015

{

1016

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1017

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1022

'only_matching': True,

1023

},

1024

{

1025

# Video with unsupported adaptive stream type formats

1026

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1031

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1032

'duration': 433,

1033

'upload_date': '20130923',

1034

'uploader': 'Amelia Putri Harwita',

1035

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1036

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1037

'formats': 'maxcount:10',

1038

},

1039

'params': {

1040

'skip_download': True,

1041

'youtube_include_dash_manifest': False,

1042

},

1043

'skip': 'not actual anymore',

1044

},

1045

{

1046

# Youtube Music Auto-generated description

1047

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1052

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1053

'upload_date': '20190312',

1054

'uploader': 'Stephen - Topic',

1055

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1056

'artist': 'Stephen',

1057

'track': 'Voyeur Girl',

1058

'album': 'it\'s too much love to know my dear',

1059

'release_date': '20190313',

1060

'release_year': 2019,

1061

},

1062

'params': {

1063

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1068

'only_matching': True,

1069

},

1070

{

1071

# invalid -> valid video id redirection

1072

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1077

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1078

'upload_date': '20090125',

1079

'uploader': 'Prochorowka',

1080

'uploader_id': 'Prochorowka',

1081

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1082

'artist': 'Panjabi MC',

1083

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1084

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1085

},

1086

'params': {

1087

'skip_download': True,

},

},

{

# empty description results in an empty string

1092

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1099

'uploader_id': 'ElevageOrVert',

1100

'uploader': 'ElevageOrVert',

1101

},

1102

'params': {

1103

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1108

# see [2] for an example with '};' inside ytInitialPlayerResponse

1109

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1110

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1111

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1116

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1117

'upload_date': '20130831',

1118

'uploader_id': 'kudvenkat',

1119

'uploader': 'kudvenkat',

1120

},

1121

'params': {

1122

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1127

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1128

'only_matching': True,

1129

},

1130

{

1131

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1132

'only_matching': True,

},

]

def __init__(self, *args, **kwargs):

1137

super(YoutubeIE, self).__init__(*args, **kwargs)

1138

self._player_cache = {}

1139

1140

def report_video_info_webpage_download(self, video_id):

1141

"""Report attempt to download video info webpage."""

1142

self.to_screen('%s: Downloading video info webpage' % video_id)

1143

1144

def report_information_extraction(self, video_id):

1145

"""Report attempt to extract video information."""

1146

self.to_screen('%s: Extracting video information' % video_id)

1147

1148

def report_unavailable_format(self, video_id, format):

1149

"""Report extracted video URL."""

1150

self.to_screen('%s: Format %s not available' % (video_id, format))

1151

1152

def report_rtmp_download(self):

1153

"""Indicate the download will use the RTMP protocol."""

1154

self.to_screen('RTMP download detected')

1155

1156

def _signature_cache_id(self, example_sig):

1157

""" Return a string representation of a signature """

1158

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1159

1160

@classmethod

1161

def _extract_player_info(cls, player_url):

1162

for player_re in cls._PLAYER_INFO_RE:

1163

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1168

return id_m.group('ext'), id_m.group('id')

1169

1170

def _extract_signature_function(self, video_id, player_url, example_sig):

1171

player_type, player_id = self._extract_player_info(player_url)

1172

1173

# Read from filesystem cache

1174

func_id = '%s_%s_%s' % (

1175

player_type, player_id, self._signature_cache_id(example_sig))

1176

assert os.path.basename(func_id) == func_id

1177

1178

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1179

if cache_spec is not None:

1180

return lambda s: ''.join(s[i] for i in cache_spec)

1181

1182

download_note = (

1183

'Downloading player %s' % player_url

1184

if self._downloader.params.get('verbose') else

1185

'Downloading %s player %s' % (player_type, player_id)

1186

)

1187

if player_type == 'js':

1188

code = self._download_webpage(

1189

player_url, video_id,

1190

note=download_note,

1191

errnote='Download of %s failed' % player_url)

1192

res = self._parse_sig_js(code)

1193

elif player_type == 'swf':

1194

urlh = self._request_webpage(

1195

player_url, video_id,

1196

note=download_note,

1197

errnote='Download of %s failed' % player_url)

1198

code = urlh.read()

1199

res = self._parse_sig_swf(code)

1200

else:

1201

assert False, 'Invalid player type %r' % player_type

1202

1203

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1204

cache_res = res(test_string)

1205

cache_spec = [ord(c) for c in cache_res]

1206

1207

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1208

return res

1209

1210

def _print_sig_code(self, func, example_sig):

1211

def gen_sig_code(idxs):

1212

def _genslice(start, end, step):

1213

starts = '' if start == 0 else str(start)

1214

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1215

steps = '' if step == 1 else (':%d' % step)

1216

return 's[%s%s%s]' % (starts, ends, steps)

1217

1218

step = None

1219

# Quelch pyflakes warnings - start will be set when step is set

1220

start = '(Never used)'

1221

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1226

step = None

1227

continue

1228

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1238

1239

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1240

cache_res = func(test_string)

1241

cache_spec = [ord(c) for c in cache_res]

1242

expr_code = ' + '.join(gen_sig_code(cache_spec))

1243

signature_id_tuple = '(%s)' % (

1244

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1245

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1246

' return %s\n') % (signature_id_tuple, expr_code)

1247

self.to_screen('Extracted signature function:\n' + code)

1248

1249

def _parse_sig_js(self, jscode):

1250

funcname = self._search_regex(

1251

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1252

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1253

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1254

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1255

# Obsolete patterns

1256

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1257

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1258

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1259

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1260

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1261

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1262

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1263

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1264

jscode, 'Initial JS player signature function name', group='sig')

1265

1266

jsi = JSInterpreter(jscode)

1267

initial_function = jsi.extract_function(funcname)

1268

return lambda s: initial_function([s])

1269

1270

def _parse_sig_swf(self, file_contents):

1271

swfi = SWFInterpreter(file_contents)

1272

TARGET_CLASSNAME = 'SignatureDecipher'

1273

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1274

initial_function = swfi.extract_function(searched_class, 'decipher')

1275

return lambda s: initial_function([s])

1276

1277

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1278

"""Turn the encrypted s field into a working signature"""

1279

1280

if player_url is None:

1281

raise ExtractorError('Cannot decrypt signature without player_url')

1282

1283

if player_url.startswith('//'):

1284

player_url = 'https:' + player_url

1285

elif not re.match(r'https?://', player_url):

1286

player_url = compat_urlparse.urljoin(

1287

'https://www.youtube.com', player_url)

1288

try:

1289

player_id = (player_url, self._signature_cache_id(s))

1290

if player_id not in self._player_cache:

1291

func = self._extract_signature_function(

1292

video_id, player_url, s

1293

)

1294

self._player_cache[player_id] = func

1295

func = self._player_cache[player_id]

1296

if self._downloader.params.get('youtube_print_sig_code'):

1297

self._print_sig_code(func, s)

1298

return func(s)

1299

except Exception as e:

1300

tb = traceback.format_exc()

1301

raise ExtractorError(

1302

'Signature extraction failed: ' + tb, cause=e)

1303

1304

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1305

try:

1306

subs_doc = self._download_xml(

1307

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1308

video_id, note=False)

1309

except ExtractorError as err:

1310

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1315

lang = track.attrib['lang_code']

1316

if lang in sub_lang_list:

1317

continue

1318

sub_formats = []

1319

for ext in self._SUBTITLE_FORMATS:

1320

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1325

})

1326

sub_formats.append({

1327

'url': 'https://www.youtube.com/api/timedtext?' + params,

1328

'ext': ext,

1329

})

1330

sub_lang_list[lang] = sub_formats

1331

if has_live_chat_replay:

1332

sub_lang_list['live_chat'] = [

1333

{

1334

'video_id': video_id,

1335

'ext': 'json',

1336

'protocol': 'youtube_live_chat_replay',

1337

},

1338

]

1339

if not sub_lang_list:

1340

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1345

patterns = (

1346

# User data may contain arbitrary character sequences that may affect

1347

# JSON extraction with regex, e.g. when '};' is contained the second

1348

# regex won't capture the whole JSON. Yet working around by trying more

1349

# concrete regex first keeping in mind proper quoted string handling

1350

# to be implemented in future that will replace this workaround (see

1351

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1352

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1353

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1354

r';ytplayer\.config\s*=\s*({.+?});',

1355

)

1356

config = self._search_regex(

1357

patterns, webpage, 'ytplayer.config', default=None)

1358

if config:

1359

return self._parse_json(

1360

uppercase_escape(config), video_id, fatal=False)

1361

1362

def _get_automatic_captions(self, video_id, player_response, player_config):

1363

"""We need the webpage for getting the captions url, pass it as an

1364

argument to speed up the process."""

1365

self.to_screen('%s: Looking for automatic captions' % video_id)

1366

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1367

if not (player_response or player_config):

1368

self._downloader.report_warning(err_msg)

1369

return {}

1370

try:

1371

args = player_config.get('args') if player_config else {}

1372

caption_url = args.get('ttsurl')

1373

if caption_url:

1374

timestamp = args['timestamp']

1375

# We get the available subtitles

1376

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1382

caption_list = self._download_xml(list_url, video_id)

1383

original_lang_node = caption_list.find('track')

1384

if original_lang_node is None:

1385

self._downloader.report_warning('Video doesn\'t have automatic captions')

1386

return {}

1387

original_lang = original_lang_node.attrib['lang_code']

1388

caption_kind = original_lang_node.attrib.get('kind', '')

1389

1390

sub_lang_list = {}

1391

for lang_node in caption_list.findall('target'):

1392

sub_lang = lang_node.attrib['lang_code']

1393

sub_formats = []

1394

for ext in self._SUBTITLE_FORMATS:

1395

params = compat_urllib_parse_urlencode({

1396

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1401

})

1402

sub_formats.append({

1403

'url': caption_url + '&' + params,

1404

'ext': ext,

1405

})

1406

sub_lang_list[sub_lang] = sub_formats

1407

return sub_lang_list

1408

1409

def make_captions(sub_url, sub_langs):

1410

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1411

caption_qs = compat_parse_qs(parsed_sub_url.query)

1412

captions = {}

1413

for sub_lang in sub_langs:

1414

sub_formats = []

1415

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1421

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1427

return captions

1428

1429

# New captions format as of 22.06.2017

1430

if player_response:

1431

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1432

base_url = renderer['captionTracks'][0]['baseUrl']

1433

sub_lang_list = []

1434

for lang in renderer['translationLanguages']:

1435

lang_code = lang.get('languageCode')

1436

if lang_code:

1437

sub_lang_list.append(lang_code)

1438

return make_captions(base_url, sub_lang_list)

1439

1440

# Some videos don't provide ttsurl but rather caption_tracks and

1441

# caption_translation_languages (e.g. 20LmZk1hakA)

1442

# Does not used anymore as of 22.06.2017

1443

caption_tracks = args['caption_tracks']

1444

caption_translation_languages = args['caption_translation_languages']

1445

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1446

sub_lang_list = []

1447

for lang in caption_translation_languages.split(','):

1448

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1449

sub_lang = lang_qs.get('lc', [None])[0]

1450

if sub_lang:

1451

sub_lang_list.append(sub_lang)

1452

return make_captions(caption_url, sub_lang_list)

1453

# An extractor error can be raise by the download process if there are

1454

# no automatic captions but there are subtitles

1455

except (KeyError, IndexError, ExtractorError):

1456

self._downloader.report_warning(err_msg)

1457

return {}

1458

1459

def _mark_watched(self, video_id, video_info, player_response):

1460

playback_url = url_or_none(try_get(

1461

player_response,

1462

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1463

video_info, lambda x: x['videostats_playback_base_url'][0]))

1464

if not playback_url:

1465

return

1466

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1467

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1468

1469

# cpn generation algorithm is reverse engineered from base.js.

1470

# In fact it works even with dummy cpn.

1471

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1472

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1479

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1480

1481

self._download_webpage(

1482

playback_url, video_id, 'Marking watched',

1483

'Unable to mark watched', fatal=False)

1484

1485

@staticmethod

1486

def _extract_urls(webpage):

1487

# Embedded YouTube player

1488

entries = [

1489

unescapeHTML(mobj.group('url'))

1490

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1501

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1502

\1''', webpage)]

1503

1504

# lazyYT YouTube embed

1505

entries.extend(list(map(

1506

unescapeHTML,

1507

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1508

1509

# Wordpress "YouTube Video Importer" plugin

1510

matches = re.findall(r'''(?x)<div[^>]+

1511

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1512

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1513

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1519

urls = YoutubeIE._extract_urls(webpage)

1520

return urls[0] if urls else None

1521

1522

@classmethod

1523

def extract_id(cls, url):

1524

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1525

if mobj is None:

1526

raise ExtractorError('Invalid URL: %s' % url)

1527

video_id = mobj.group(2)

1528

return video_id

1529

1530

def _extract_chapters_from_json(self, webpage, video_id, duration):

1531

if not webpage:

1532

return

1533

data = self._extract_yt_initial_data(video_id, webpage)

1534

if not data or not isinstance(data, dict):

1535

return

1536

chapters_list = try_get(

1537

data,

1538

lambda x: x['playerOverlays']

1539

['playerOverlayRenderer']

1540

['decoratedPlayerBarRenderer']

1541

['decoratedPlayerBarRenderer']

1542

['playerBar']

1543

['chapteredPlayerBarRenderer']

1544

['chapters'],

1545

list)

1546

if not chapters_list:

1547

return

1548

1549

def chapter_time(chapter):

1550

return float_or_none(

1551

try_get(

1552

chapter,

1553

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1558

start_time = chapter_time(chapter)

1559

if start_time is None:

1560

continue

1561

end_time = (chapter_time(chapters_list[next_num])

1562

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1567

compat_str)

1568

chapters.append({

1569

'start_time': start_time,

1570

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1577

if not description:

1578

return None

1579

chapter_lines = re.findall(

1580

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1581

description)

1582

if not chapter_lines:

1583

return None

1584

chapters = []

1585

for next_num, (chapter_line, time_point) in enumerate(

1586

chapter_lines, start=1):

1587

start_time = parse_duration(time_point)

1588

if start_time is None:

1589

continue

1590

if start_time > duration:

1591

break

1592

end_time = (duration if next_num == len(chapter_lines)

1593

else parse_duration(chapter_lines[next_num][1]))

1594

if end_time is None:

1595

continue

1596

if end_time > duration:

1597

end_time = duration

1598

if start_time > end_time:

1599

break

1600

chapter_title = re.sub(

1601

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1602

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1603

chapters.append({

1604

'start_time': start_time,

1605

'end_time': end_time,

1606

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1611

return (self._extract_chapters_from_json(webpage, video_id, duration)

1612

or self._extract_chapters_from_description(description, duration))

1613

1614

def _real_extract(self, url):

1615

url, smuggled_data = unsmuggle_url(url, {})

1616

1617

proto = (

1618

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1624

for component in [parsed_url.fragment, parsed_url.query]:

1625

query = compat_parse_qs(component)

1626

if start_time is None and 't' in query:

1627

start_time = parse_duration(query['t'][0])

1628

if start_time is None and 'start' in query:

1629

start_time = parse_duration(query['start'][0])

1630

if end_time is None and 'end' in query:

1631

end_time = parse_duration(query['end'][0])

1632

1633

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1634

mobj = re.search(self._NEXT_URL_RE, url)

1635

if mobj:

1636

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1637

video_id = self.extract_id(url)

1638

1639

# Get video webpage

1640

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1641

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1642

1643

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1644

video_id = qs.get('v', [None])[0] or video_id

1645

1646

# Attempt to extract SWF player URL

1647

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1648

if mobj is not None:

1649

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1656

dash_mpd = video_info.get('dashmpd')

1657

if dash_mpd and dash_mpd[0] not in dash_mpds:

1658

dash_mpds.append(dash_mpd[0])

1659

1660

def add_dash_mpd_pr(pl_response):

1661

dash_mpd = url_or_none(try_get(

1662

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1663

compat_str))

1664

if dash_mpd and dash_mpd not in dash_mpds:

1665

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1671

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1672

1673

def extract_player_response(player_response, video_id):

1674

pl_response = str_or_none(player_response)

1675

if not pl_response:

1676

return

1677

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1678

if isinstance(pl_response, dict):

1679

add_dash_mpd_pr(pl_response)

1680

return pl_response

1681

1682

def extract_embedded_config(embed_webpage, video_id):

1683

embedded_config = self._search_regex(

1684

r'setConfig$({.*})$;',

1685

embed_webpage, 'ytInitialData', default=None)

1686

if embedded_config:

1687

return embedded_config

video_info = {}

player_response = {}

ytplayer_config = None

embed_webpage = None

# Get video info

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1696

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1697

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1698

age_gate = True

1699

# We simulate the access to the video from www.youtube.com/v/{video_id}

1700

# this can be viewed without login into Youtube

1701

url = proto + '://www.youtube.com/embed/%s' % video_id

1702

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1703

ext = extract_embedded_config(embed_webpage, video_id)

1704

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1705

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1706

if not playable_in_embed:

1707

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1708

playable_in_embed = ''

1709

else:

1710

playable_in_embed = playable_in_embed.group('playableinEmbed')

1711

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1712

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1713

if playable_in_embed == 'false':

1714

'''

1715

# TODO apply this patch when Support for Python 2.6(!) and above drops

1716

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1717

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1718

'''

1719

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1720

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1721

age_gate = False

1722

# Try looking directly into the video webpage

1723

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1724

if ytplayer_config:

1725

args = ytplayer_config.get("args")

1726

if args is not None:

1727

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1728

# Convert to the same format returned by compat_parse_qs

1729

video_info = dict((k, [v]) for k, v in args.items())

1730

add_dash_mpd(video_info)

1731

# Rental video is not rented but preview is available (e.g.

1732

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1733

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1734

if not video_info and args.get('ypc_vid'):

1735

return self.url_result(

1736

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1737

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1738

is_live = True

1739

if not player_response:

1740

player_response = extract_player_response(args.get('player_response'), video_id)

1741

elif not player_response:

1742

player_response = ytplayer_config

1743

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1744

add_dash_mpd_pr(player_response)

1745

else:

1746

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1747

else:

1748

data = compat_urllib_parse_urlencode({

1749

'video_id': video_id,

1750

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1751

'sts': self._search_regex(

1752

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1753

})

1754

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1755

try:

1756

video_info_webpage = self._download_webpage(

1757

video_info_url, video_id,

1758

note='Refetching age-gated info webpage',

1759

errnote='unable to download video info webpage')

1760

except ExtractorError:

1761

video_info_webpage = None

1762

if video_info_webpage:

1763

video_info = compat_parse_qs(video_info_webpage)

1764

pl_response = video_info.get('player_response', [None])[0]

1765

player_response = extract_player_response(pl_response, video_id)

1766

add_dash_mpd(video_info)

1767

view_count = extract_view_count(video_info)

1768

else:

1769

age_gate = False

1770

# Try looking directly into the video webpage

1771

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1772

if ytplayer_config:

1773

args = ytplayer_config.get('args', {})

1774

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1775

# Convert to the same format returned by compat_parse_qs

1776

video_info = dict((k, [v]) for k, v in args.items())

1777

add_dash_mpd(video_info)

1778

# Rental video is not rented but preview is available (e.g.

1779

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1780

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1781

if not video_info and args.get('ypc_vid'):

1782

return self.url_result(

1783

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1784

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1785

is_live = True

1786

if not player_response:

1787

player_response = extract_player_response(args.get('player_response'), video_id)

1788

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1789

add_dash_mpd_pr(player_response)

1790

1791

if not video_info and not player_response:

1792

player_response = extract_player_response(

1793

self._search_regex(

1794

(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),

1795

self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,

1796

'initial player response', default='{}'),

1797

video_id)

1798

1799

def extract_unavailable_message():

1800

messages = []

1801

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1802

msg = self._html_search_regex(

1803

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1804

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1809

1810

if not video_info and not player_response:

1811

unavailable_message = extract_unavailable_message()

1812

if not unavailable_message:

1813

unavailable_message = 'Unable to extract video data'

1814

raise ExtractorError(

1815

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1816

1817

if not isinstance(video_info, dict):

1818

video_info = {}

1819

1820

playable_in_embed = try_get(

1821

player_response, lambda x: x['playabilityStatus']['playableInEmbed'])

1822

1823

video_details = try_get(

1824

player_response, lambda x: x['videoDetails'], dict) or {}

1825

1826

microformat = try_get(

1827

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1828

1829

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1830

if not video_title:

1831

self._downloader.report_warning('Unable to extract video title')

1832

video_title = '_'

1833

1834

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1835

if video_description:

1836

1837

def replace_url(m):

1838

redir_url = compat_urlparse.urljoin(url, m.group(1))

1839

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1840

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1841

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1848

<a\s+

1849

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1850

(?:title|href)="([^"]+)"\s+

1851

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1856

video_description = clean_html(video_description)

1857

else:

1858

video_description = video_details.get('shortDescription')

1859

if video_description is None:

1860

video_description = self._html_search_meta('description', video_webpage)

1861

1862

if not smuggled_data.get('force_singlefeed', False):

1863

if not self._downloader.params.get('noplaylist'):

1864

multifeed_metadata_list = try_get(

1865

player_response,

1866

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1867

compat_str) or try_get(

1868

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1869

if multifeed_metadata_list:

1870

entries = []

1871

feed_ids = []

1872

for feed in multifeed_metadata_list.split(','):

1873

# Unquote should take place before split on comma (,) since textual

1874

# fields may contain comma as well (see

1875

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1876

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1877

1878

def feed_entry(name):

1879

return try_get(feed_data, lambda x: x[name][0], compat_str)

1880

1881

feed_id = feed_entry('id')

1882

if not feed_id:

1883

continue

1884

feed_title = feed_entry('title')

1885

title = video_title

1886

if feed_title:

1887

title += ' (%s)' % feed_title

1888

entries.append({

1889

'_type': 'url_transparent',

1890

'ie_key': 'Youtube',

1891

'url': smuggle_url(

1892

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1893

{'force_singlefeed': True}),

1894

'title': title,

1895

})

1896

feed_ids.append(feed_id)

1897

self.to_screen(

1898

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1899

% (', '.join(feed_ids), video_id))

1900

return self.playlist_result(entries, video_id, video_title, video_description)

1901

else:

1902

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1903

1904

if view_count is None:

1905

view_count = extract_view_count(video_info)

1906

if view_count is None and video_details:

1907

view_count = int_or_none(video_details.get('viewCount'))

1908

if view_count is None and microformat:

1909

view_count = int_or_none(microformat.get('viewCount'))

1910

1911

if is_live is None:

1912

is_live = bool_or_none(video_details.get('isLive'))

1913

1914

has_live_chat_replay = False

1915

if not is_live:

1916

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

1917

try:

1918

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

1919

has_live_chat_replay = True

1920

except (KeyError, IndexError, TypeError):

1921

pass

1922

1923

# Check for "rental" videos

1924

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1925

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

1926

1927

def _extract_filesize(media_url):

1928

return int_or_none(self._search_regex(

1929

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

1930

1931

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

1932

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

1933

1934

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1935

self.report_rtmp_download()

1936

formats = [{

1937

'format_id': '_rtmp',

1938

'protocol': 'rtmp',

1939

'url': video_info['conn'][0],

1940

'player_url': player_url,

1941

}]

1942

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

1943

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1944

if 'rtmpe%3Dyes' in encoded_url_map:

1945

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

1946

formats = []

1947

formats_spec = {}

1948

fmt_list = video_info.get('fmt_list', [''])[0]

1949

if fmt_list:

1950

for fmt in fmt_list.split(','):

1951

spec = fmt.split('/')

1952

if len(spec) > 1:

1953

width_height = spec[1].split('x')

1954

if len(width_height) == 2:

1955

formats_spec[spec[0]] = {

1956

'resolution': spec[1],

1957

'width': int_or_none(width_height[0]),

1958

'height': int_or_none(width_height[1]),

1959

}

1960

for fmt in streaming_formats:

1961

itag = str_or_none(fmt.get('itag'))

1962

if not itag:

1963

continue

1964

quality = fmt.get('quality')

1965

quality_label = fmt.get('qualityLabel') or quality

1966

formats_spec[itag] = {

1967

'asr': int_or_none(fmt.get('audioSampleRate')),

1968

'filesize': int_or_none(fmt.get('contentLength')),

1969

'format_note': quality_label,

1970

'fps': int_or_none(fmt.get('fps')),

1971

'height': int_or_none(fmt.get('height')),

1972

# bitrate for itag 43 is always 2147483647

1973

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

1974

'width': int_or_none(fmt.get('width')),

1975

}

1976

1977

for fmt in streaming_formats:

1978

if fmt.get('drmFamilies') or fmt.get('drm_families'):

1979

continue

1980

url = url_or_none(fmt.get('url'))

1981

1982

if not url:

1983

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

1984

if not cipher:

1985

continue

1986

url_data = compat_parse_qs(cipher)

1987

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

1993

1994

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

1995

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

2000

if not format_id:

2001

continue

2002

format_id = compat_str(format_id)

2003

2004

if cipher:

2005

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2006

ASSETS_RE = (

2007

r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',

2008

r'"jsUrl"\s*:\s*("[^"]+")',

2009

r'"assets":.+?"js":\s*("[^"]+")')

2010

jsplayer_url_json = self._search_regex(

2011

ASSETS_RE,

2012

embed_webpage if age_gate else video_webpage,

2013

'JS player URL (1)', default=None)

2014

if not jsplayer_url_json and not age_gate:

2015

# We need the embed website after all

2016

if embed_webpage is None:

2017

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2018

embed_webpage = self._download_webpage(

2019

embed_url, video_id, 'Downloading embed webpage')

2020

jsplayer_url_json = self._search_regex(

2021

ASSETS_RE, embed_webpage, 'JS player URL')

2022

2023

player_url = json.loads(jsplayer_url_json)

2024

if player_url is None:

2025

player_url_json = self._search_regex(

2026

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2027

video_webpage, 'age gate player URL')

2028

player_url = json.loads(player_url_json)

2029

2030

if 'sig' in url_data:

2031

url += '&signature=' + url_data['sig'][0]

2032

elif 's' in url_data:

2033

encrypted_sig = url_data['s'][0]

2034

2035

if self._downloader.params.get('verbose'):

2036

if player_url is None:

2037

player_desc = 'unknown'

2038

else:

2039

player_type, player_version = self._extract_player_info(player_url)

2040

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2041

parts_sizes = self._signature_cache_id(encrypted_sig)

2042

self.to_screen('{%s} signature length %s, %s' %

2043

(format_id, parts_sizes, player_desc))

2044

2045

signature = self._decrypt_signature(

2046

encrypted_sig, video_id, player_url, age_gate)

2047

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2048

url += '&%s=%s' % (sp, signature)

2049

if 'ratebypass' not in url:

2050

url += '&ratebypass=yes'

2051

2052

dct = {

2053

'format_id': format_id,

2054

'url': url,

2055

'player_url': player_url,

2056

}

2057

if format_id in self._formats:

2058

dct.update(self._formats[format_id])

2059

if format_id in formats_spec:

2060

dct.update(formats_spec[format_id])

2061

2062

# Some itags are not included in DASH manifest thus corresponding formats will

2063

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2064

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2065

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2066

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2067

2068

if width is None:

2069

width = int_or_none(fmt.get('width'))

2070

if height is None:

2071

height = int_or_none(fmt.get('height'))

2072

2073

filesize = int_or_none(url_data.get(

2074

'clen', [None])[0]) or _extract_filesize(url)

2075

2076

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2077

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2078

2079

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2080

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2081

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2082

2083

more_fields = {

2084

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2090

}

2091

for key, value in more_fields.items():

2092

if value:

2093

dct[key] = value

2094

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2095

if type_:

2096

type_split = type_.split(';')

2097

kind_ext = type_split[0].split('/')

2098

if len(kind_ext) == 2:

2099

kind, _ = kind_ext

2100

dct['ext'] = mimetype2ext(type_split[0])

2101

if kind in ('audio', 'video'):

2102

codecs = None

2103

for mobj in re.finditer(

2104

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2105

if mobj.group('key') == 'codecs':

2106

codecs = mobj.group('val')

2107

break

2108

if codecs:

2109

dct.update(parse_codecs(codecs))

2110

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2111

dct['downloader_options'] = {

2112

# Youtube throttles chunks >~10M

2113

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2121

compat_str))

2122

or url_or_none(try_get(

2123

video_info, lambda x: x['hlsvp'][0], compat_str)))

2124

if manifest_url:

2125

formats = []

2126

m3u8_formats = self._extract_m3u8_formats(

2127

manifest_url, video_id, 'mp4', fatal=False)

2128

for a_format in m3u8_formats:

2129

itag = self._search_regex(

2130

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2131

if itag:

2132

a_format['format_id'] = itag

2133

if itag in self._formats:

2134

dct = self._formats[itag].copy()

2135

dct.update(a_format)

2136

a_format = dct

2137

a_format['player_url'] = player_url

2138

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2139

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2140

if self._downloader.params.get('youtube_include_hls_manifest', True):

2141

formats.append(a_format)

2142

else:

2143

error_message = extract_unavailable_message()

2144

if not error_message:

2145

reason_list = try_get(

2146

player_response,

2147

lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],

2148

list) or []

2149

for reason in reason_list:

2150

if not isinstance(reason, dict):

2151

continue

2152

reason_text = try_get(reason, lambda x: x['text'], compat_str)

2153

if reason_text:

2154

if not error_message:

2155

error_message = ''

2156

error_message += reason_text

2157

if error_message:

2158

error_message = clean_html(error_message)

2159

if not error_message:

2160

error_message = clean_html(try_get(

2161

player_response, lambda x: x['playabilityStatus']['reason'],

2162

compat_str))

2163

if not error_message:

2164

error_message = clean_html(

2165

try_get(video_info, lambda x: x['reason'][0], compat_str))

2166

if error_message:

2167

raise ExtractorError(error_message, expected=True)

2168

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2169

2170

# uploader

2171

video_uploader = try_get(

2172

video_info, lambda x: x['author'][0],

2173

compat_str) or str_or_none(video_details.get('author'))

2174

if video_uploader:

2175

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2176

else:

2177

self._downloader.report_warning('unable to extract uploader name')

2178

2179

# uploader_id

2180

video_uploader_id = None

2181

video_uploader_url = None

2182

mobj = re.search(

2183

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2184

video_webpage)

2185

if mobj is not None:

2186

video_uploader_id = mobj.group('uploader_id')

2187

video_uploader_url = mobj.group('uploader_url')

2188

else:

2189

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2190

if owner_profile_url:

2191

video_uploader_id = self._search_regex(

2192

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2193

default=None)

2194

video_uploader_url = owner_profile_url

2195

2196

channel_id = (

2197

str_or_none(video_details.get('channelId'))

2198

or self._html_search_meta(

2199

'channelId', video_webpage, 'channel id', default=None)

2200

or self._search_regex(

2201

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2202

video_webpage, 'channel id', default=None, group='id'))

2203

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2204

2205

thumbnails = []

2206

thumbnails_list = try_get(

2207

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2208

for t in thumbnails_list:

2209

if not isinstance(t, dict):

2210

continue

2211

thumbnail_url = url_or_none(t.get('url'))

2212

if not thumbnail_url:

2213

continue

2214

thumbnails.append({

2215

'url': thumbnail_url,

2216

'width': int_or_none(t.get('width')),

2217

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2222

# We try first to get a high quality image:

2223

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2224

video_webpage, re.DOTALL)

2225

if m_thumb is not None:

2226

video_thumbnail = m_thumb.group(1)

2227

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2228

if thumbnail_url:

2229

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2230

if video_thumbnail:

2231

thumbnails.append({'url': video_thumbnail})

2232

2233

# upload date

2234

upload_date = self._html_search_meta(

2235

'datePublished', video_webpage, 'upload date', default=None)

2236

if not upload_date:

2237

upload_date = self._search_regex(

2238

[r'(?s)id="eow-date.*?>(.*?)</span>',

2239

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2240

video_webpage, 'upload date', default=None)

2241

if not upload_date:

2242

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2243

upload_date = unified_strdate(upload_date)

2244

2245

video_license = self._html_search_regex(

2246

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2247

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2260

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2268

video_creator = clean_html(m_music.group('creator'))

2269

else:

2270

video_alt_title = video_creator = None

2271

2272

def extract_meta(field):

2273

return self._html_search_regex(

2274

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2275

video_webpage, field, default=None)

2276

2277

track = extract_meta('Song')

2278

artist = extract_meta('Artist')

2279

album = extract_meta('Album')

2280

2281

# Youtube Music Auto-generated description

2282

release_date = release_year = None

2283

if video_description:

2284

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

2285

if mobj:

2286

if not track:

2287

track = mobj.group('track').strip()

2288

if not artist:

2289

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2290

if not album:

2291

album = mobj.group('album'.strip())

2292

release_year = mobj.group('release_year')

2293

release_date = mobj.group('release_date')

2294

if release_date:

2295

release_date = release_date.replace('-', '')

2296

if not release_year:

2297

release_year = int(release_date[:4])

2298

if release_year:

2299

release_year = int(release_year)

2300

2301

yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)

2302

contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

2303

for content in contents:

2304

rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []

2305

multiple_songs = False

2306

for row in rows:

2307

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

2308

multiple_songs = True

2309

break

2310

for row in rows:

2311

mrr = row.get('metadataRowRenderer') or {}

2312

mrr_title = try_get(

2313

mrr, lambda x: x['title']['simpleText'], compat_str)

2314

mrr_contents = try_get(

2315

mrr, lambda x: x['contents'][0], dict) or {}

2316

mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)

2317

if not (mrr_title and mrr_contents_text):

2318

continue

2319

if mrr_title == 'License':

2320

video_license = mrr_contents_text

2321

elif not multiple_songs:

2322

if mrr_title == 'Album':

2323

album = mrr_contents_text

2324

elif mrr_title == 'Artist':

2325

artist = mrr_contents_text

2326

elif mrr_title == 'Song':

2327

track = mrr_contents_text

2328

2329

m_episode = re.search(

2330

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2331

video_webpage)

2332

if m_episode:

2333

series = unescapeHTML(m_episode.group('series'))

2334

season_number = int(m_episode.group('season'))

2335

episode_number = int(m_episode.group('episode'))

2336

else:

2337

series = season_number = episode_number = None

2338

2339

m_cat_container = self._search_regex(

2340

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2341

video_webpage, 'categories', default=None)

2342

category = None

2343

if m_cat_container:

2344

category = self._html_search_regex(

2345

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2350

video_categories = None if category is None else [category]

2351

2352

video_tags = [

2353

unescapeHTML(m.group('content'))

2354

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2355

if not video_tags:

2356

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2357

2358

def _extract_count(count_name):

2359

return str_to_int(self._search_regex(

2360

(r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),

2361

r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),

2362

video_webpage, count_name, default=None))

2363

2364

like_count = _extract_count('like')

2365

dislike_count = _extract_count('dislike')

2366

2367

if view_count is None:

2368

view_count = str_to_int(self._search_regex(

2369

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2370

'view count', default=None))

2371

2372

average_rating = (

2373

float_or_none(video_details.get('averageRating'))

2374

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2375

2376

# subtitles

2377

video_subtitles = self.extract_subtitles(

2378

video_id, video_webpage, has_live_chat_replay)

2379

automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)

2380

2381

video_duration = try_get(

2382

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2383

if not video_duration:

2384

video_duration = int_or_none(video_details.get('lengthSeconds'))

2385

if not video_duration:

2386

video_duration = parse_duration(self._html_search_meta(

2387

'duration', video_webpage, 'video duration'))

2388

2389

# Get Subscriber Count of channel

2390

subscriber_count = parse_count(self._search_regex(

2391

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# annotations

video_annotations = None

2399

if self._downloader.params.get('writeannotations', False):

2400

xsrf_token = None

2401

ytcfg = self._extract_ytcfg(video_id, video_webpage)

2402

if ytcfg:

2403

xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)

2404

if not xsrf_token:

2405

xsrf_token = self._search_regex(

2406

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',

2407

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2408

invideo_url = try_get(

2409

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2410

if xsrf_token and invideo_url:

2411

xsrf_field_name = None

2412

if ytcfg:

2413

xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)

2414

if not xsrf_field_name:

2415

xsrf_field_name = self._search_regex(

2416

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2417

video_webpage, 'xsrf field name',

2418

group='xsrf_field_name', default='session_token')

2419

video_annotations = self._download_webpage(

2420

self._proto_relative_url(invideo_url),

2421

video_id, note='Downloading annotations',

2422

errnote='Unable to download video annotations', fatal=False,

2423

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2424

2425

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2426

2427

# Look for the DASH manifest

2428

if self._downloader.params.get('youtube_include_dash_manifest', True):

2429

dash_mpd_fatal = True

2430

for mpd_url in dash_mpds:

2431

dash_formats = {}

2432

try:

2433

def decrypt_sig(mobj):

2434

s = mobj.group(1)

2435

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2436

return '/signature/%s' % dec_s

2437

2438

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2439

2440

for df in self._extract_mpd_formats(

2441

mpd_url, video_id, fatal=dash_mpd_fatal,

2442

formats_dict=self._formats):

2443

if not df.get('filesize'):

2444

df['filesize'] = _extract_filesize(df['url'])

2445

# Do not overwrite DASH format found in some previous DASH manifest

2446

if df['format_id'] not in dash_formats:

2447

dash_formats[df['format_id']] = df

2448

# Additional DASH manifests may end up in HTTP Error 403 therefore

2449

# allow them to fail without bug report message if we already have

2450

# some DASH manifest succeeded. This is temporary workaround to reduce

2451

# burst of bug reports until we figure out the reason and whether it

2452

# can be fixed at all.

2453

dash_mpd_fatal = False

2454

except (ExtractorError, KeyError) as e:

2455

self.report_warning(

2456

'Skipping DASH manifest: %r' % e, video_id)

2457

if dash_formats:

2458

# Remove the formats we found through non-DASH, they

2459

# contain less info and it can be wrong, because we use

2460

# fixed values (for example the resolution). See

2461

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2462

# example.

2463

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2464

formats.extend(dash_formats.values())

2465

2466

# Check for malformed aspect ratio

2467

stretched_m = re.search(

2468

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2469

video_webpage)

2470

if stretched_m:

2471

w = float(stretched_m.group('w'))

2472

h = float(stretched_m.group('h'))

2473

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2474

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2479

f['stretched_ratio'] = ratio

2480

2481

if not formats:

2482

if 'reason' in video_info:

2483

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2484

regions_allowed = self._html_search_meta(

2485

'regionsAllowed', video_webpage, default=None)

2486

countries = regions_allowed.split(',') if regions_allowed else None

2487

self.raise_geo_restricted(

2488

msg=video_info['reason'][0], countries=countries)

2489

reason = video_info['reason'][0]

2490

if 'Invalid parameters' in reason:

2491

unavailable_message = extract_unavailable_message()

2492

if unavailable_message:

2493

reason = unavailable_message

2494

raise ExtractorError(

2495

'YouTube said: %s' % reason,

2496

expected=True, video_id=video_id)

2497

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2498

raise ExtractorError('This video is DRM protected.', expected=True)

2499

2500

self._sort_formats(formats)

2501

2502

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2507

'uploader_id': video_uploader_id,

2508

'uploader_url': video_uploader_url,

2509

'channel_id': channel_id,

2510

'channel_url': channel_url,

2511

'upload_date': upload_date,

2512

'license': video_license,

2513

'creator': video_creator or artist,

2514

'title': video_title,

2515

'alt_title': video_alt_title or track,

2516

'thumbnails': thumbnails,

2517

'description': video_description,

2518

'categories': video_categories,

2519

'tags': video_tags,

2520

'subtitles': video_subtitles,

2521

'automatic_captions': automatic_captions,

2522

'duration': video_duration,

2523

'age_limit': 18 if age_gate else 0,

2524

'annotations': video_annotations,

2525

'chapters': chapters,

2526

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2527

'view_count': view_count,

2528

'like_count': like_count,

2529

'dislike_count': dislike_count,

2530

'average_rating': average_rating,

2531

'formats': formats,

2532

'is_live': is_live,

2533

'start_time': start_time,

2534

'end_time': end_time,

2535

'series': series,

2536

'season_number': season_number,

2537

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2542

'release_year': release_year,

2543

'subscriber_count': subscriber_count,

2544

'playable_in_embed': playable_in_embed,

}

class YoutubeTabIE(YoutubeBaseInfoExtractor):

2549

IE_DESC = 'YouTube.com tab'

2550

_VALID_URL = r'''(?x)

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

invidio\.us

)/

(?:

(?:channel|c|user)/|

(?P<not_channel>

feed/|

(?:playlist|watch)\?.*?\blist=

2562

)|

2563

(?!(?:%s)\b) # Direct URLs

2564

)

2565

(?P<id>[^/?\#&]+)

2566

''' % YoutubeBaseInfoExtractor._RESERVED_NAMES

2567

IE_NAME = 'youtube:tab'

2568

2569

_TESTS = [{

2570

# playlists, multipage

2571

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

2572

'playlist_mincount': 94,

2573

'info_dict': {

2574

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2575

'title': 'Игорь Клейнер - Playlists',

2576

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2577

},

2578

}, {

2579

# playlists, multipage, different order

2580

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2581

'playlist_mincount': 94,

2582

'info_dict': {

2583

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2584

'title': 'Игорь Клейнер - Playlists',

2585

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2586

},

2587

}, {

2588

# playlists, singlepage

2589

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2590

'playlist_mincount': 4,

2591

'info_dict': {

2592

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

2593

'title': 'ThirstForScience - Playlists',

2594

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

2595

}

2596

}, {

2597

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

2598

'only_matching': True,

2599

}, {

2600

# basic, single video playlist

2601

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2602

'info_dict': {

2603

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2604

'uploader': 'Sergey M.',

2605

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2606

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

# empty playlist

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2612

'info_dict': {

2613

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2614

'uploader': 'Sergey M.',

2615

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2616

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

# Home tab

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

2622

'info_dict': {

2623

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2624

'title': 'lex will - Home',

2625

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2626

},

2627

'playlist_mincount': 2,

2628

}, {

2629

# Videos tab

2630

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

2631

'info_dict': {

2632

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2633

'title': 'lex will - Videos',

2634

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2635

},

2636

'playlist_mincount': 975,

2637

}, {

2638

# Videos tab, sorted by popular

2639

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

2640

'info_dict': {

2641

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2642

'title': 'lex will - Videos',

2643

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2644

},

2645

'playlist_mincount': 199,

2646

}, {

2647

# Playlists tab

2648

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

2649

'info_dict': {

2650

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2651

'title': 'lex will - Playlists',

2652

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2653

},

2654

'playlist_mincount': 17,

2655

}, {

2656

# Community tab

2657

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

2658

'info_dict': {

2659

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2660

'title': 'lex will - Community',

2661

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2662

},

2663

'playlist_mincount': 18,

2664

}, {

2665

# Channels tab

2666

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

2667

'info_dict': {

2668

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2669

'title': 'lex will - Channels',

2670

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2671

},

2672

'playlist_mincount': 138,

2673

}, {

2674

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2675

'only_matching': True,

2676

}, {

2677

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2678

'only_matching': True,

2679

}, {

2680

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2681

'only_matching': True,

2682

}, {

2683

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2684

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2685

'info_dict': {

2686

'title': '29C3: Not my department',

2687

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2688

'uploader': 'Christiaan008',

2689

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

2690

},

2691

'playlist_count': 96,

2692

}, {

2693

'note': 'Large playlist',

2694

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2695

'info_dict': {

2696

'title': 'Uploads from Cauchemar',

2697

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2698

'uploader': 'Cauchemar',

2699

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

2700

},

2701

'playlist_mincount': 1123,

2702

}, {

2703

# even larger playlist, 8832 videos

2704

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

2705

'only_matching': True,

2706

}, {

2707

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2708

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2709

'info_dict': {

2710

'title': 'Uploads from Interstellar Movie',

2711

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2712

'uploader': 'Interstellar Movie',

2713

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

2714

},

2715

'playlist_mincount': 21,

2716

}, {

2717

# https://github.com/ytdl-org/youtube-dl/issues/21844

2718

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2719

'info_dict': {

2720

'title': 'Data Analysis with Dr Mike Pound',

2721

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2722

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

2723

'uploader': 'Computerphile',

2724

},

2725

'playlist_mincount': 11,

2726

}, {

2727

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2728

'only_matching': True,

2729

}, {

2730

# Playlist URL that does not actually serve a playlist

2731

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2736

'uploader': 'STREEM',

2737

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2738

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2739

'upload_date': '20150526',

2740

'license': 'Standard YouTube License',

2741

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2742

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2747

},

2748

'params': {

2749

'skip_download': True,

2750

},

2751

'skip': 'This video is not available.',

2752

'add_ie': [YoutubeIE.ie_key()],

2753

}, {

2754

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2755

'only_matching': True,

2756

}, {

2757

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

2758

'only_matching': True,

2759

}, {

2760

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

'info_dict': {

'id': '9Auq9mYxFEE',

'ext': 'mp4',

'title': 'Watch Sky News live',

2765

'uploader': 'Sky News',

2766

'uploader_id': 'skynews',

2767

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

2768

'upload_date': '20191102',

2769

'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',

2770

'categories': ['News & Politics'],

2771

'tags': list,

2772

'like_count': int,

2773

'dislike_count': int,

2774

},

2775

'params': {

2776

'skip_download': True,

2777

},

2778

}, {

2779

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2784

'uploader': 'The Young Turks',

2785

'uploader_id': 'TheYoungTurks',

2786

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2787

'upload_date': '20150715',

2788

'license': 'Standard YouTube License',

2789

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2790

'categories': ['News & Politics'],

2791

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2792

'like_count': int,

2793

'dislike_count': int,

2794

},

2795

'params': {

2796

'skip_download': True,

2797

},

2798

'only_matching': True,

2799

}, {

2800

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2801

'only_matching': True,

2802

}, {

2803

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2804

'only_matching': True,

2805

}, {

2806

'url': 'https://www.youtube.com/feed/trending',

2807

'only_matching': True,

2808

}, {

2809

# needs auth

2810

'url': 'https://www.youtube.com/feed/library',

2811

'only_matching': True,

2812

}, {

2813

# needs auth

2814

'url': 'https://www.youtube.com/feed/history',

2815

'only_matching': True,

2816

}, {

2817

# needs auth

2818

'url': 'https://www.youtube.com/feed/subscriptions',

2819

'only_matching': True,

2820

}, {

2821

# needs auth

2822

'url': 'https://www.youtube.com/feed/watch_later',

2823

'only_matching': True,

2824

}, {

2825

# no longer available?

2826

'url': 'https://www.youtube.com/feed/recommended',

2827

'only_matching': True,

2828

}, {

2829

# inline playlist with not always working continuations

2830

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

2831

'only_matching': True,

2832

}, {

2833

'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',

2834

'only_matching': True,

2835

}, {

2836

'url': 'https://www.youtube.com/course',

2837

'only_matching': True,

2838

}, {

2839

'url': 'https://www.youtube.com/zsecurity',

2840

'only_matching': True,

2841

}, {

2842

'url': 'http://www.youtube.com/NASAgovVideo/videos',

2843

'only_matching': True,

2844

}, {

2845

'url': 'https://www.youtube.com/TheYoungTurks/live',

2846

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2851

return False if YoutubeIE.suitable(url) else super(

2852

YoutubeTabIE, cls).suitable(url)

2853

2854

def _extract_channel_id(self, webpage):

2855

channel_id = self._html_search_meta(

2856

'channelId', webpage, 'channel id', default=None)

2857

if channel_id:

2858

return channel_id

2859

channel_url = self._html_search_meta(

2860

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

2861

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

2862

'twitter:app:url:googleplay'), webpage, 'channel url')

2863

return self._search_regex(

2864

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

2865

channel_url, 'channel id')

2866

2867

@staticmethod

2868

def _extract_grid_item_renderer(item):

2869

for item_kind in ('Playlist', 'Video', 'Channel'):

2870

renderer = item.get('grid%sRenderer' % item_kind)

if renderer:

return renderer

def _extract_video(self, renderer):

2875

video_id = renderer.get('videoId')

2876

title = try_get(

2877

renderer,

2878

(lambda x: x['title']['runs'][0]['text'],

2879

lambda x: x['title']['simpleText']), compat_str)

2880

description = try_get(

2881

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

2882

compat_str)

2883

duration = parse_duration(try_get(

2884

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

2885

view_count_text = try_get(

2886

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

2887

view_count = str_to_int(self._search_regex(

2888

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

2889

'view count', default=None))

2890

uploader = try_get(

2891

renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

2892

return {

2893

'_type': 'url_transparent',

2894

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

2899

'duration': duration,

2900

'view_count': view_count,

2901

'uploader': uploader,

2902

}

2903

2904

def _grid_entries(self, grid_renderer):

2905

for item in grid_renderer['items']:

2906

if not isinstance(item, dict):

2907

continue

2908

renderer = self._extract_grid_item_renderer(item)

2909

if not isinstance(renderer, dict):

2910

continue

2911

title = try_get(

2912

renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2913

# playlist

2914

playlist_id = renderer.get('playlistId')

2915

if playlist_id:

2916

yield self.url_result(

2917

'https://www.youtube.com/playlist?list=%s' % playlist_id,

2918

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

2919

video_title=title)

2920

# video

2921

video_id = renderer.get('videoId')

2922

if video_id:

2923

yield self._extract_video(renderer)

2924

# channel

2925

channel_id = renderer.get('channelId')

2926

if channel_id:

2927

title = try_get(

2928

renderer, lambda x: x['title']['simpleText'], compat_str)

2929

yield self.url_result(

2930

'https://www.youtube.com/channel/%s' % channel_id,

2931

ie=YoutubeTabIE.ie_key(), video_title=title)

2932

2933

def _shelf_entries_from_content(self, shelf_renderer):

2934

content = shelf_renderer.get('content')

2935

if not isinstance(content, dict):

2936

return

2937

renderer = content.get('gridRenderer')

2938

if renderer:

2939

# TODO: add support for nested playlists so each shelf is processed

2940

# as separate playlist

2941

# TODO: this includes only first N items

2942

for entry in self._grid_entries(renderer):

2943

yield entry

2944

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

2950

ep = try_get(

2951

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

2952

compat_str)

2953

shelf_url = urljoin('https://www.youtube.com', ep)

2954

if shelf_url:

2955

# Skipping links to another channels, note that checking for

2956

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

2957

# will not work

2958

if skip_channels and '/channels?' in shelf_url:

2959

return

2960

title = try_get(

2961

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2962

yield self.url_result(shelf_url, video_title=title)

2963

# Shelf may not contain shelf URL, fallback to extraction from content

2964

for entry in self._shelf_entries_from_content(shelf_renderer):

2965

yield entry

2966

2967

def _playlist_entries(self, video_list_renderer):

2968

for content in video_list_renderer['contents']:

2969

if not isinstance(content, dict):

2970

continue

2971

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

2972

if not isinstance(renderer, dict):

2973

continue

2974

video_id = renderer.get('videoId')

2975

if not video_id:

2976

continue

2977

yield self._extract_video(renderer)

2978

2979

r""" # Not needed in the new implementation

2980

def _itemSection_entries(self, item_sect_renderer):

2981

for content in item_sect_renderer['contents']:

2982

if not isinstance(content, dict):

2983

continue

2984

renderer = content.get('videoRenderer', {})

2985

if not isinstance(renderer, dict):

2986

continue

2987

video_id = renderer.get('videoId')

2988

if not video_id:

2989

continue

2990

yield self._extract_video(renderer)

2991

"""

2992

2993

def _rich_entries(self, rich_grid_renderer):

2994

renderer = try_get(

2995

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

2996

video_id = renderer.get('videoId')

2997

if not video_id:

2998

return

2999

yield self._extract_video(renderer)

3000

3001

def _video_entry(self, video_renderer):

3002

video_id = video_renderer.get('videoId')

3003

if video_id:

3004

return self._extract_video(video_renderer)

3005

3006

def _post_thread_entries(self, post_thread_renderer):

3007

post_renderer = try_get(

3008

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3009

if not post_renderer:

3010

return

3011

# video attachment

3012

video_renderer = try_get(

3013

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)

3014

video_id = None

3015

if video_renderer:

3016

entry = self._video_entry(video_renderer)

if entry:

yield entry

# inline video links

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3021

for run in runs:

3022

if not isinstance(run, dict):

3023

continue

3024

ep_url = try_get(

3025

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3026

if not ep_url:

3027

continue

3028

if not YoutubeIE.suitable(ep_url):

3029

continue

3030

ep_video_id = YoutubeIE._match_id(ep_url)

3031

if video_id == ep_video_id:

3032

continue

3033

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)

3034

3035

def _post_thread_continuation_entries(self, post_thread_continuation):

3036

contents = post_thread_continuation.get('contents')

3037

if not isinstance(contents, list):

3038

return

3039

for content in contents:

3040

renderer = content.get('backstagePostThreadRenderer')

3041

if not isinstance(renderer, dict):

3042

continue

3043

for entry in self._post_thread_entries(renderer):

yield entry

@staticmethod

def _build_continuation_query(continuation, ctp=None):

3048

query = {

3049

'ctoken': continuation,

3050

'continuation': continuation,

}

if ctp:

query['itct'] = ctp

return query

@staticmethod

def _extract_next_continuation_data(renderer):

3058

next_continuation = try_get(

3059

renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)

3060

if not next_continuation:

3061

return

3062

continuation = next_continuation.get('continuation')

3063

if not continuation:

3064

return

3065

ctp = next_continuation.get('clickTrackingParams')

3066

return YoutubeTabIE._build_continuation_query(continuation, ctp)

3067

3068

@classmethod

3069

def _extract_continuation(cls, renderer):

3070

next_continuation = cls._extract_next_continuation_data(renderer)

3071

if next_continuation:

3072

return next_continuation

3073

contents = renderer.get('contents')

3074

if not isinstance(contents, list):

3075

return

3076

for content in contents:

3077

if not isinstance(content, dict):

3078

continue

3079

continuation_ep = try_get(

3080

content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],

3081

dict)

3082

if not continuation_ep:

3083

continue

3084

continuation = try_get(

3085

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

3086

if not continuation:

3087

continue

3088

ctp = continuation_ep.get('clickTrackingParams')

3089

return YoutubeTabIE._build_continuation_query(continuation, ctp)

3090

3091

def _entries(self, tab, identity_token):

3092

3093

def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds

3094

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3095

for content in contents:

3096

if not isinstance(content, dict):

3097

continue

3098

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3099

if not is_renderer:

3100

renderer = content.get('richItemRenderer')

3101

if renderer:

3102

for entry in self._rich_entries(renderer):

3103

yield entry

3104

continuation_list[0] = self._extract_continuation(parent_renderer)

3105

continue

3106

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3107

for isr_content in isr_contents:

3108

if not isinstance(isr_content, dict):

3109

continue

3110

renderer = isr_content.get('playlistVideoListRenderer')

3111

if renderer:

3112

for entry in self._playlist_entries(renderer):

3113

yield entry

3114

continuation_list[0] = self._extract_continuation(renderer)

3115

continue

3116

renderer = isr_content.get('gridRenderer')

3117

if renderer:

3118

for entry in self._grid_entries(renderer):

3119

yield entry

3120

continuation_list[0] = self._extract_continuation(renderer)

3121

continue

3122

renderer = isr_content.get('shelfRenderer')

3123

if renderer:

3124

is_channels_tab = tab.get('title') == 'Channels'

3125

for entry in self._shelf_entries(renderer, not is_channels_tab):

3126

yield entry

3127

continue

3128

renderer = isr_content.get('backstagePostThreadRenderer')

3129

if renderer:

3130

for entry in self._post_thread_entries(renderer):

3131

yield entry

3132

continuation_list[0] = self._extract_continuation(renderer)

3133

continue

3134

renderer = isr_content.get('videoRenderer')

3135

if renderer:

3136

entry = self._video_entry(renderer)

if entry:

yield entry

if not continuation_list[0]:

3141

continuation_list[0] = self._extract_continuation(is_renderer)

3142

3143

if not continuation_list[0]:

3144

continuation_list[0] = self._extract_continuation(parent_renderer)

3145

3146

continuation_list = [None] # Python 2 doesnot support nonlocal

3147

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3152

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3153

for entry in extract_entries(parent_renderer):

3154

yield entry

3155

continuation = continuation_list[0]

3156

3157

headers = {

3158

'x-youtube-client-name': '1',

3159

'x-youtube-client-version': '2.20201112.04.01',

3160

}

3161

if identity_token:

3162

headers['x-youtube-identity-token'] = identity_token

3163

3164

for page_num in itertools.count(1):

if not continuation:

break

count = 0

retries = 3

while count <= retries:

3170

try:

3171

# Downloading page may result in intermittent 5xx HTTP error

3172

# that is usually worked around with a retry

3173

browse = self._download_json(

3174

'https://www.youtube.com/browse_ajax', None,

3175

'Downloading page %d%s'

3176

% (page_num, ' (retry #%d)' % count if count else ''),

3177

headers=headers, query=continuation)

3178

break

3179

except ExtractorError as e:

3180

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

if not browse:

break

response = try_get(browse, lambda x: x[1]['response'], dict)

if not response:

break

continuation_contents = try_get(

3192

response, lambda x: x['continuationContents'], dict)

3193

if continuation_contents:

3194

continuation_renderer = continuation_contents.get('playlistVideoListContinuation')

3195

if continuation_renderer:

3196

for entry in self._playlist_entries(continuation_renderer):

3197

yield entry

3198

continuation = self._extract_continuation(continuation_renderer)

3199

continue

3200

continuation_renderer = continuation_contents.get('gridContinuation')

3201

if continuation_renderer:

3202

for entry in self._grid_entries(continuation_renderer):

3203

yield entry

3204

continuation = self._extract_continuation(continuation_renderer)

3205

continue

3206

continuation_renderer = continuation_contents.get('itemSectionContinuation')

3207

if continuation_renderer:

3208

for entry in self._post_thread_continuation_entries(continuation_renderer):

3209

yield entry

3210

continuation = self._extract_continuation(continuation_renderer)

3211

continue

3212

continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds

3213

if continuation_renderer:

3214

continuation_list = [None]

3215

for entry in extract_entries(continuation_renderer):

3216

yield entry

3217

continuation = continuation_list[0]

3218

continue

3219

3220

continuation_items = try_get(

3221

response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)

3222

if continuation_items:

3223

continuation_item = continuation_items[0]

3224

if not isinstance(continuation_item, dict):

3225

continue

3226

renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')

3227

if renderer:

3228

video_list_renderer = {'contents': continuation_items}

3229

for entry in self._playlist_entries(video_list_renderer):

3230

yield entry

3231

continuation = self._extract_continuation(video_list_renderer)

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3237

for tab in tabs:

3238

if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):

3239

return tab['tabRenderer']

3240

else:

3241

raise ExtractorError('Unable to find selected tab')

3242

3243

@staticmethod

3244

def _extract_uploader(data):

3245

uploader = {}

3246

sidebar_renderer = try_get(

3247

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3248

if sidebar_renderer:

3249

for item in sidebar_renderer:

3250

if not isinstance(item, dict):

3251

continue

3252

renderer = item.get('playlistSidebarSecondaryInfoRenderer')

3253

if not isinstance(renderer, dict):

3254

continue

3255

owner = try_get(

3256

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3257

if owner:

3258

uploader['uploader'] = owner.get('text')

3259

uploader['uploader_id'] = try_get(

3260

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3261

uploader['uploader_url'] = urljoin(

3262

'https://www.youtube.com/',

3263

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3264

return uploader

3265

3266

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3267

selected_tab = self._extract_selected_tab(tabs)

3268

renderer = try_get(

3269

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3270

playlist_id = title = description = None

3271

if renderer:

3272

channel_title = renderer.get('title') or item_id

3273

tab_title = selected_tab.get('title')

3274

title = channel_title or item_id

3275

if tab_title:

3276

title += ' - %s' % tab_title

3277

description = renderer.get('description')

3278

playlist_id = renderer.get('externalId')

3279

renderer = try_get(

3280

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3281

if renderer:

3282

title = renderer.get('title')

3283

description = None

3284

playlist_id = item_id

3285

if playlist_id is None:

3286

playlist_id = item_id

3287

if title is None:

3288

title = "Youtube " + playlist_id.title()

3289

playlist = self.playlist_result(

3290

self._entries(selected_tab, identity_token),

3291

playlist_id=playlist_id, playlist_title=title,

3292

playlist_description=description)

3293

playlist.update(self._extract_uploader(data))

3294

return playlist

3295

3296

def _extract_from_playlist(self, item_id, url, data, playlist):

3297

title = playlist.get('title') or try_get(

3298

data, lambda x: x['titleText']['simpleText'], compat_str)

3299

playlist_id = playlist.get('playlistId') or item_id

3300

# Inline playlist rendition continuation does not always work

3301

# at Youtube side, so delegating regular tab-based playlist URL

3302

# processing whenever possible.

3303

playlist_url = urljoin(url, try_get(

3304

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3305

compat_str))

3306

if playlist_url and playlist_url != url:

3307

return self.url_result(

3308

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

3309

video_title=title)

3310

return self.playlist_result(

3311

self._playlist_entries(playlist), playlist_id=playlist_id,

3312

playlist_title=title)

3313

3314

@staticmethod

3315

def _extract_alerts(data):

3316

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

3317

if not isinstance(alert_dict, dict):

3318

continue

3319

for renderer in alert_dict:

3320

alert = alert_dict[renderer]

3321

alert_type = alert.get('type')

3322

if not alert_type:

3323

continue

3324

message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)

3325

if message:

3326

yield alert_type, message

3327

for run in try_get(alert, lambda x: x['text']['runs'], list) or []:

3328

message = try_get(run, lambda x: x['text'], compat_str)

3329

if message:

3330

yield alert_type, message

3331

3332

def _extract_identity_token(self, webpage, item_id):

3333

ytcfg = self._extract_ytcfg(item_id, webpage)

3334

if ytcfg:

3335

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

3336

if token:

3337

return token

3338

return self._search_regex(

3339

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3340

'identity token', default=None)

3341

3342

def _real_extract(self, url):

3343

item_id = self._match_id(url)

3344

url = compat_urlparse.urlunparse(

3345

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

3346

is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)

3347

if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':

3348

self._downloader.report_warning(

3349

'A channel/user page was given. All the channel\'s videos will be downloaded. '

3350

'To download only the videos in the home page, add a "/featured" to the URL')

3351

url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')

3352

3353

# Handle both video/playlist URLs

3354

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3355

video_id = qs.get('v', [None])[0]

3356

playlist_id = qs.get('list', [None])[0]

3357

3358

if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:

3359

if playlist_id:

3360

self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))

3361

url = 'https://www.youtube.com/playlist?list=%s' % playlist_id

3362

# return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())

3363

else:

3364

raise ExtractorError('Unable to recognize tab page')

3365

if video_id and playlist_id:

3366

if self._downloader.params.get('noplaylist'):

3367

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3368

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3369

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3370

3371

webpage = self._download_webpage(url, item_id)

3372

identity_token = self._extract_identity_token(webpage, item_id)

3373

data = self._extract_yt_initial_data(item_id, webpage)

3374

for alert_type, alert_message in self._extract_alerts(data):

3375

self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))

3376

tabs = try_get(

3377

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3378

if tabs:

3379

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3380

playlist = try_get(

3381

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3382

if playlist:

3383

return self._extract_from_playlist(item_id, url, data, playlist)

3384

# Fallback to video extraction if no playlist alike page is recognized.

3385

# First check for the current video then try the v attribute of URL query.

3386

video_id = try_get(

3387

data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],

3388

compat_str) or video_id

3389

if video_id:

3390

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3391

# Failed to recognize

3392

raise ExtractorError('Unable to recognize tab page')

3393

3394

3395

class YoutubePlaylistIE(InfoExtractor):

3396

IE_DESC = 'YouTube.com playlists'

3397

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

3408

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3409

IE_NAME = 'youtube:playlist'

3410

_TESTS = [{

3411

'note': 'issue #673',

3412

'url': 'PLBB231211A4F62143',

3413

'info_dict': {

3414

'title': '[OLD]Team Fortress 2 (Class-based LP)',

3415

'id': 'PLBB231211A4F62143',

3416

'uploader': 'Wickydoo',

3417

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

3418

},

3419

'playlist_mincount': 29,

3420

}, {

3421

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3422

'info_dict': {

3423

'title': 'YDL_safe_search',

3424

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3425

},

3426

'playlist_count': 2,

3427

'skip': 'This playlist is private',

3428

}, {

3429

'note': 'embedded',

3430

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

3435

'uploader': 'milan',

3436

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

3437

}

3438

}, {

3439

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3440

'playlist_mincount': 982,

3441

'info_dict': {

3442

'title': '2018 Chinese New Singles (11/6 updated)',

3443

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3444

'uploader': 'LBK',

3445

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

3446

}

3447

}, {

3448

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

3449

'only_matching': True,

3450

}, {

3451

# music album playlist

3452

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

3453

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3458

return False if YoutubeTabIE.suitable(url) else super(

3459

YoutubePlaylistIE, cls).suitable(url)

3460

3461

def _real_extract(self, url):

3462

playlist_id = self._match_id(url)

3463

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3464

if not qs:

3465

qs = {'list': playlist_id}

3466

return self.url_result(

3467

update_url_query('https://www.youtube.com/playlist', qs),

3468

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3469

3470

3471

class YoutubeYtBeIE(InfoExtractor):

3472

IE_DESC = 'youtu.be'

3473

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3474

_TESTS = [{

3475

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

3480

'uploader': 'Backus-Page House Museum',

3481

'uploader_id': 'backuspagemuseum',

3482

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

3483

'upload_date': '20161008',

3484

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

3485

'categories': ['Nonprofits & Activism'],

3486

'tags': list,

3487

'like_count': int,

3488

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

3493

},

3494

}, {

3495

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

3496

'only_matching': True,

3497

}]

3498

3499

def _real_extract(self, url):

3500

mobj = re.match(self._VALID_URL, url)

3501

video_id = mobj.group('id')

3502

playlist_id = mobj.group('playlist_id')

3503

return self.url_result(

3504

update_url_query('https://www.youtube.com/watch', {

3505

'v': video_id,

3506

'list': playlist_id,

3507

'feature': 'youtu.be',

3508

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3509

3510

3511

class YoutubeYtUserIE(InfoExtractor):

3512

IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'

3513

_VALID_URL = r'ytuser:(?P<id>.+)'

3514

_TESTS = [{

3515

'url': 'ytuser:phihag',

3516

'only_matching': True,

3517

}]

3518

3519

def _real_extract(self, url):

3520

user_id = self._match_id(url)

3521

return self.url_result(

3522

'https://www.youtube.com/user/%s' % user_id,

3523

ie=YoutubeTabIE.ie_key(), video_id=user_id)

3524

3525

3526

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3527

IE_NAME = 'youtube:favorites'

3528

IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'

3529

_VALID_URL = r':ytfav(?:ou?rite)?s?'

3530

_LOGIN_REQUIRED = True

3531

_TESTS = [{

3532

'url': ':ytfav',

3533

'only_matching': True,

3534

}, {

3535

'url': ':ytfavorites',

3536

'only_matching': True,

3537

}]

3538

3539

def _real_extract(self, url):

3540

return self.url_result(

3541

'https://www.youtube.com/playlist?list=LL',

3542

ie=YoutubeTabIE.ie_key())

3543

3544

3545

class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):

3546

IE_DESC = 'YouTube.com searches'

3547

# there doesn't appear to be a real limit, for example if you search for

3548

# 'python' you get more than 8.000.000 results

3549

_MAX_RESULTS = float('inf')

3550

IE_NAME = 'youtube:search'

3551

_SEARCH_KEY = 'ytsearch'

3552

_SEARCH_PARAMS = None

3553

_TESTS = []

3554

3555

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3566

data['params'] = self._SEARCH_PARAMS

3567

total = 0

3568

for page_num in itertools.count(1):

3569

search = self._download_json(

3570

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3571

video_id='query "%s"' % query,

3572

note='Downloading page %s' % page_num,

3573

errnote='Unable to download API page', fatal=False,

3574

data=json.dumps(data).encode('utf8'),

3575

headers={'content-type': 'application/json'})

3576

if not search:

3577

break

3578

slr_contents = try_get(

3579

search,

3580

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3581

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

isr_contents = []

continuation_token = None

3588

# Youtube sometimes adds promoted content to searches,

3589

# changing the index location of videos and token.

3590

# So we search through all entries till we find them.

3591

for index, isr in enumerate(slr_contents):

3592

if not isr_contents:

3593

isr_contents = try_get(

3594

slr_contents,

3595

(lambda x: x[index]['itemSectionRenderer']['contents']),

3596

list)

3597

for content in isr_contents:

3598

if content.get('videoRenderer') is not None:

break

else:

isr_contents = []

if continuation_token is None:

3604

continuation_token = try_get(

3605

slr_contents,

3606

lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][

3607

'token'],

3608

compat_str)

3609

if continuation_token is not None and isr_contents:

break

if not isr_contents:

break

for content in isr_contents:

3615

if not isinstance(content, dict):

3616

continue

3617

video = content.get('videoRenderer')

3618

if not isinstance(video, dict):

3619

continue

3620

video_id = video.get('videoId')

3621

if not video_id:

3622

continue

3623

title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)

3624

description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)

3625

duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))

3626

view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

3627

view_count = str_to_int(self._search_regex(

3628

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

3629

'view count', default=None))

3630

uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

3631

total += 1

3632

yield {

3633

'_type': 'url_transparent',

3634

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

3639

'duration': duration,

3640

'view_count': view_count,

3641

'uploader': uploader,

}

if total == n:

return

if not continuation_token:

3646

break

3647

data['continuation'] = continuation_token

3648

3649

def _get_n_results(self, query, n):

3650

"""Get a specified number of results for a query"""

3651

return self.playlist_result(self._entries(query, n), query)

3652

3653

3654

class YoutubeSearchDateIE(YoutubeSearchIE):

3655

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3656

_SEARCH_KEY = 'ytsearchdate'

3657

IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'

3658

_SEARCH_PARAMS = 'CAI%3D'

3659

3660

3661

class YoutubeSearchURLIE(YoutubeSearchIE):

3662

IE_DESC = 'YouTube.com searches, "ytsearch" keyword'

3663

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

3664

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

3665

# _MAX_RESULTS = 100

3666

_TESTS = [{

3667

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3668

'playlist_mincount': 5,

3669

'info_dict': {

3670

'title': 'youtube-dl test video',

3671

}

3672

}, {

3673

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3674

'only_matching': True,

}]

@classmethod

def _make_valid_url(cls):

3679

return cls._VALID_URL

3680

3681

def _real_extract(self, url):

3682

qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

3683

query = (qs.get('search_query') or qs.get('q'))[0]

3684

self._SEARCH_PARAMS = qs.get('sp', ('',))[0]

3685

return self._get_n_results(query, self._MAX_RESULTS)

3686

3687

3688

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

3689

"""

3690

Base class for feed extractors

3691

Subclasses must define the _FEED_NAME property.

3692

"""

3693

_LOGIN_REQUIRED = True

# _MAX_PAGES = 5

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3700

3701

def _real_initialize(self):

3702

self._login()

3703

3704

def _real_extract(self, url):

3705

return self.url_result(

3706

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3707

ie=YoutubeTabIE.ie_key())

3708

3709

3710

class YoutubeWatchLaterIE(InfoExtractor):

3711

IE_NAME = 'youtube:watchlater'

3712

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3713

_VALID_URL = r':ytwatchlater'

3714

_TESTS = [{

3715

'url': ':ytwatchlater',

3716

'only_matching': True,

3717

}]

3718

3719

def _real_extract(self, url):

3720

return self.url_result(

3721

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

3722

3723

3724

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3725

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3726

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

3727

_FEED_NAME = 'recommended'

3728

_TESTS = [{

3729

'url': ':ytrec',

3730

'only_matching': True,

3731

}, {

3732

'url': ':ytrecommended',

3733

'only_matching': True,

3734

}, {

3735

'url': 'https://youtube.com',

3736

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3741

IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'

3742

_VALID_URL = r':ytsub(?:scription)?s?'

3743

_FEED_NAME = 'subscriptions'

3744

_TESTS = [{

3745

'url': ':ytsubs',

3746

'only_matching': True,

3747

}, {

3748

'url': ':ytsubscriptions',

3749

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3754

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3755

_VALID_URL = r':ythistory'

3756

_FEED_NAME = 'history'

3757

_TESTS = [{

3758

'url': ':ythistory',

3759

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

3764

IE_NAME = 'youtube:truncated_url'

3765

IE_DESC = False # Do not list

3766

_VALID_URL = r'''(?x)

3767

(?:https?://)?

3768

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3769

(?:watch\?(?:

3770

feature=[a-z_]+|

3771

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3784

'only_matching': True,

3785

}, {

3786

'url': 'https://www.youtube.com/watch?',

3787

'only_matching': True,

3788

}, {

3789

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3790

'only_matching': True,

3791

}, {

3792

'url': 'https://www.youtube.com/watch?feature=foo',

3793

'only_matching': True,

3794

}, {

3795

'url': 'https://www.youtube.com/watch?hl=en-GB',

3796

'only_matching': True,

3797

}, {

3798

'url': 'https://www.youtube.com/watch?t=2372',

3799

'only_matching': True,

3800

}]

3801

3802

def _real_extract(self, url):

3803

raise ExtractorError(

3804

'Did you forget to quote the URL? Remember that & is a meta '

3805

'character in most shells, so you want to put the URL in quotes, '

3806

'like youtube-dl '

3807

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3808

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3813

IE_NAME = 'youtube:truncated_id'

3814

IE_DESC = False # Do not list

3815

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3816

3817

_TESTS = [{

3818

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3819

'only_matching': True,

3820

}]

3821

3822

def _real_extract(self, url):

3823

video_id = self._match_id(url)

3824

raise ExtractorError(

3825

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

expected=True)

# Do Youtube show urls even exist anymore? I couldn't find any

3830

r'''

3831

class YoutubeShowIE(YoutubeTabIE):

3832

IE_DESC = 'YouTube.com (multi-season) shows'

3833

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3834

IE_NAME = 'youtube:show'

3835

_TESTS = [{

3836

'url': 'https://www.youtube.com/show/airdisasters',

3837

'playlist_mincount': 5,

3838

'info_dict': {

3839

'id': 'airdisasters',

3840

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3845

playlist_id = self._match_id(url)

3846

return super(YoutubeShowIE, self)._real_extract(

3847

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3848

'''