jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import functools
	6	import hashlib
	7	import itertools
	8	import json
	9	import math
	10	import os.path
	11	import random
	12	import re
	13	import sys
	14	import threading
	15	import time
	16	import traceback
	17
	18	from .common import InfoExtractor, SearchInfoExtractor
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	clean_html,
	35	datetime_from_str,
	36	dict_get,
	37	error_to_compat_str,
	38	float_or_none,
	39	format_field,
	40	get_first,
	41	int_or_none,
	42	is_html,
	43	join_nonempty,
	44	js_to_json,
	45	mimetype2ext,
	46	network_exceptions,
	47	orderedSet,
	48	parse_codecs,
	49	parse_count,
	50	parse_duration,
	51	parse_iso8601,
	52	parse_qs,
	53	qualities,
	54	remove_end,
	55	remove_start,
	56	smuggle_url,
	57	str_or_none,
	58	str_to_int,
	59	strftime_or_none,
	60	traverse_obj,
	61	try_get,
	62	unescapeHTML,
	63	unified_strdate,
	64	unified_timestamp,
	65	unsmuggle_url,
	66	update_url_query,
	67	url_or_none,
	68	urljoin,
	69	variadic,
	70	)
	71
	72	# any clients starting with _ cannot be explicity requested by the user
	73	INNERTUBE_CLIENTS = {
	74	'web': {
	75	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	76	'INNERTUBE_CONTEXT': {
	77	'client': {
	78	'clientName': 'WEB',
	79	'clientVersion': '2.20211221.00.00',
	80	}
	81	},
	82	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	83	},
	84	'web_embedded': {
	85	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	86	'INNERTUBE_CONTEXT': {
	87	'client': {
	88	'clientName': 'WEB_EMBEDDED_PLAYER',
	89	'clientVersion': '1.20211215.00.01',
	90	},
	91	},
	92	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	93	},
	94	'web_music': {
	95	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	96	'INNERTUBE_HOST': 'music.youtube.com',
	97	'INNERTUBE_CONTEXT': {
	98	'client': {
	99	'clientName': 'WEB_REMIX',
	100	'clientVersion': '1.20211213.00.00',
	101	}
	102	},
	103	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	104	},
	105	'web_creator': {
	106	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	107	'INNERTUBE_CONTEXT': {
	108	'client': {
	109	'clientName': 'WEB_CREATOR',
	110	'clientVersion': '1.20211220.02.00',
	111	}
	112	},
	113	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	114	},
	115	'android': {
	116	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	117	'INNERTUBE_CONTEXT': {
	118	'client': {
	119	'clientName': 'ANDROID',
	120	'clientVersion': '16.49',
	121	}
	122	},
	123	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	124	'REQUIRE_JS_PLAYER': False
	125	},
	126	'android_embedded': {
	127	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	128	'INNERTUBE_CONTEXT': {
	129	'client': {
	130	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	131	'clientVersion': '16.49',
	132	},
	133	},
	134	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	135	'REQUIRE_JS_PLAYER': False
	136	},
	137	'android_music': {
	138	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	139	'INNERTUBE_CONTEXT': {
	140	'client': {
	141	'clientName': 'ANDROID_MUSIC',
	142	'clientVersion': '4.57',
	143	}
	144	},
	145	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	146	'REQUIRE_JS_PLAYER': False
	147	},
	148	'android_creator': {
	149	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	150	'INNERTUBE_CONTEXT': {
	151	'client': {
	152	'clientName': 'ANDROID_CREATOR',
	153	'clientVersion': '21.47',
	154	},
	155	},
	156	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	157	'REQUIRE_JS_PLAYER': False
	158	},
	159	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	160	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	161	'ios': {
	162	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	163	'INNERTUBE_CONTEXT': {
	164	'client': {
	165	'clientName': 'IOS',
	166	'clientVersion': '16.46',
	167	'deviceModel': 'iPhone14,3',
	168	}
	169	},
	170	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	171	'REQUIRE_JS_PLAYER': False
	172	},
	173	'ios_embedded': {
	174	'INNERTUBE_CONTEXT': {
	175	'client': {
	176	'clientName': 'IOS_MESSAGES_EXTENSION',
	177	'clientVersion': '16.46',
	178	'deviceModel': 'iPhone14,3',
	179	},
	180	},
	181	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	182	'REQUIRE_JS_PLAYER': False
	183	},
	184	'ios_music': {
	185	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	186	'INNERTUBE_CONTEXT': {
	187	'client': {
	188	'clientName': 'IOS_MUSIC',
	189	'clientVersion': '4.57',
	190	},
	191	},
	192	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	193	'REQUIRE_JS_PLAYER': False
	194	},
	195	'ios_creator': {
	196	'INNERTUBE_CONTEXT': {
	197	'client': {
	198	'clientName': 'IOS_CREATOR',
	199	'clientVersion': '21.47',
	200	},
	201	},
	202	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	203	'REQUIRE_JS_PLAYER': False
	204	},
	205	# mweb has 'ultralow' formats
	206	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	207	'mweb': {
	208	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	209	'INNERTUBE_CONTEXT': {
	210	'client': {
	211	'clientName': 'MWEB',
	212	'clientVersion': '2.20211221.01.00',
	213	}
	214	},
	215	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	216	},
	217	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	218	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	219	'tv_embedded': {
	220	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	221	'INNERTUBE_CONTEXT': {
	222	'client': {
	223	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	224	'clientVersion': '2.0',
	225	},
	226	},
	227	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	228	},
	229	}
	230
	231
	232	def _split_innertube_client(client_name):
	233	variant, *base = client_name.rsplit('.', 1)
	234	if base:
	235	return variant, base[0], variant
	236	base, *variant = client_name.split('_', 1)
	237	return client_name, base, variant[0] if variant else None
	238
	239
	240	def build_innertube_clients():
	241	THIRD_PARTY = {
	242	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	243	}
	244	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	245	priority = qualities(BASE_CLIENTS[::-1])
	246
	247	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	248	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	249	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	250	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	251	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	252
	253	_, base_client, variant = _split_innertube_client(client)
	254	ytcfg['priority'] = 10 * priority(base_client)
	255
	256	if not variant:
	257	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	258	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	259	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	260	embedscreen['priority'] -= 3
	261	elif variant == 'embedded':
	262	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	263	ytcfg['priority'] -= 2
	264	else:
	265	ytcfg['priority'] -= 3
	266
	267
	268	build_innertube_clients()
	269
	270
	271	class YoutubeBaseInfoExtractor(InfoExtractor):
	272	"""Provide base functions for Youtube extractors"""
	273
	274	_RESERVED_NAMES = (
	275	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	276	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	277	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	278	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	279
	280	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	281
	282	# _NETRC_MACHINE = 'youtube'
	283
	284	# If True it will raise an error if no login info is provided
	285	_LOGIN_REQUIRED = False
	286
	287	_INVIDIOUS_SITES = (
	288	# invidious-redirect websites
	289	r'(?:www\.)?redirect\.invidious\.io',
	290	r'(?:(?:www\|dev)\.)?invidio\.us',
	291	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	292	r'(?:www\.)?invidious\.pussthecat\.org',
	293	r'(?:www\.)?invidious\.zee\.li',
	294	r'(?:www\.)?invidious\.ethibox\.fr',
	295	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	296	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	297	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	298	# youtube-dl invidious instances list
	299	r'(?:(?:www\|no)\.)?invidiou\.sh',
	300	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	301	r'(?:www\.)?invidious\.kabi\.tk',
	302	r'(?:www\.)?invidious\.mastodon\.host',
	303	r'(?:www\.)?invidious\.zapashcanon\.fr',
	304	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	305	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	306	r'(?:www\.)?invidious\.himiko\.cloud',
	307	r'(?:www\.)?invidious\.reallyancient\.tech',
	308	r'(?:www\.)?invidious\.tube',
	309	r'(?:www\.)?invidiou\.site',
	310	r'(?:www\.)?invidious\.site',
	311	r'(?:www\.)?invidious\.xyz',
	312	r'(?:www\.)?invidious\.nixnet\.xyz',
	313	r'(?:www\.)?invidious\.048596\.xyz',
	314	r'(?:www\.)?invidious\.drycat\.fr',
	315	r'(?:www\.)?inv\.skyn3t\.in',
	316	r'(?:www\.)?tube\.poal\.co',
	317	r'(?:www\.)?tube\.connect\.cafe',
	318	r'(?:www\.)?vid\.wxzm\.sx',
	319	r'(?:www\.)?vid\.mint\.lgbt',
	320	r'(?:www\.)?vid\.puffyan\.us',
	321	r'(?:www\.)?yewtu\.be',
	322	r'(?:www\.)?yt\.elukerio\.org',
	323	r'(?:www\.)?yt\.lelux\.fi',
	324	r'(?:www\.)?invidious\.ggc-project\.de',
	325	r'(?:www\.)?yt\.maisputain\.ovh',
	326	r'(?:www\.)?ytprivate\.com',
	327	r'(?:www\.)?invidious\.13ad\.de',
	328	r'(?:www\.)?invidious\.toot\.koeln',
	329	r'(?:www\.)?invidious\.fdn\.fr',
	330	r'(?:www\.)?watch\.nettohikari\.com',
	331	r'(?:www\.)?invidious\.namazso\.eu',
	332	r'(?:www\.)?invidious\.silkky\.cloud',
	333	r'(?:www\.)?invidious\.exonip\.de',
	334	r'(?:www\.)?invidious\.riverside\.rocks',
	335	r'(?:www\.)?invidious\.blamefran\.net',
	336	r'(?:www\.)?invidious\.moomoo\.de',
	337	r'(?:www\.)?ytb\.trom\.tf',
	338	r'(?:www\.)?yt\.cyberhost\.uk',
	339	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	340	r'(?:www\.)?qklhadlycap4cnod\.onion',
	341	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	342	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	343	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	344	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	345	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	346	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	347	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	348	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	349	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	350	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	351	)
	352
	353	def _initialize_consent(self):
	354	cookies = self._get_cookies('https://www.youtube.com/')
	355	if cookies.get('__Secure-3PSID'):
	356	return
	357	consent_id = None
	358	consent = cookies.get('CONSENT')
	359	if consent:
	360	if 'YES' in consent.value:
	361	return
	362	consent_id = self._search_regex(
	363	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	364	if not consent_id:
	365	consent_id = random.randint(100, 999)
	366	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	367
	368	def _initialize_pref(self):
	369	cookies = self._get_cookies('https://www.youtube.com/')
	370	pref_cookie = cookies.get('PREF')
	371	pref = {}
	372	if pref_cookie:
	373	try:
	374	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	375	except ValueError:
	376	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	377	pref.update({'hl': 'en', 'tz': 'UTC'})
	378	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	379
	380	def _real_initialize(self):
	381	self._initialize_pref()
	382	self._initialize_consent()
	383	self._check_login_required()
	384
	385	def _check_login_required(self):
	386	if (self._LOGIN_REQUIRED
	387	and self.get_param('cookiefile') is None
	388	and self.get_param('cookiesfrombrowser') is None):
	389	self.raise_login_required('Login details are needed to download this content', method='cookies')
	390
	391	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	392	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	393	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	394
	395	def _get_default_ytcfg(self, client='web'):
	396	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	397
	398	def _get_innertube_host(self, client='web'):
	399	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	400
	401	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	402	# try_get but with fallback to default ytcfg client values when present
	403	_func = lambda y: try_get(y, getter, expected_type)
	404	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	405
	406	def _extract_client_name(self, ytcfg, default_client='web'):
	407	return self._ytcfg_get_safe(
	408	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	409	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	410
	411	def _extract_client_version(self, ytcfg, default_client='web'):
	412	return self._ytcfg_get_safe(
	413	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	414	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	415
	416	def _extract_api_key(self, ytcfg=None, default_client='web'):
	417	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	418
	419	def _extract_context(self, ytcfg=None, default_client='web'):
	420	context = get_first(
	421	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	422	# Enforce language and tz for extraction
	423	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	424	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	425	return context
	426
	427	_SAPISID = None
	428
	429	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	430	time_now = round(time.time())
	431	if self._SAPISID is None:
	432	yt_cookies = self._get_cookies('https://www.youtube.com')
	433	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	434	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	435	sapisid_cookie = dict_get(
	436	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	437	if sapisid_cookie and sapisid_cookie.value:
	438	self._SAPISID = sapisid_cookie.value
	439	self.write_debug('Extracted SAPISID cookie')
	440	# SAPISID cookie is required if not already present
	441	if not yt_cookies.get('SAPISID'):
	442	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	443	self._set_cookie(
	444	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	445	else:
	446	self._SAPISID = False
	447	if not self._SAPISID:
	448	return None
	449	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	450	sapisidhash = hashlib.sha1(
	451	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	452	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	453
	454	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	455	note='Downloading API JSON', errnote='Unable to download API page',
	456	context=None, api_key=None, api_hostname=None, default_client='web'):
	457
	458	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	459	data.update(query)
	460	real_headers = self.generate_api_headers(default_client=default_client)
	461	real_headers.update({'content-type': 'application/json'})
	462	if headers:
	463	real_headers.update(headers)
	464	return self._download_json(
	465	f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
	466	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	467	data=json.dumps(data).encode('utf8'), headers=real_headers,
	468	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	469
	470	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	471	data = self._search_regex(
	472	(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	473	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	474	if data:
	475	return self._parse_json(data, item_id, fatal=fatal)
	476
	477	@staticmethod
	478	def _extract_session_index(*data):
	479	"""
	480	Index of current account in account list.
	481	See: https://github.com/yt-dlp/yt-dlp/pull/519
	482	"""
	483	for ytcfg in data:
	484	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	485	if session_index is not None:
	486	return session_index
	487
	488	# Deprecated?
	489	def _extract_identity_token(self, ytcfg=None, webpage=None):
	490	if ytcfg:
	491	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	492	if token:
	493	return token
	494	if webpage:
	495	return self._search_regex(
	496	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	497	'identity token', default=None, fatal=False)
	498
	499	@staticmethod
	500	def _extract_account_syncid(*args):

1

import base64

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

73

INNERTUBE_CLIENTS = {

74

'web': {

75

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

76

'INNERTUBE_CONTEXT': {

77

'client': {

78

'clientName': 'WEB',

79

'clientVersion': '2.20211221.00.00',

80

}

81

},

82

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

83

},

84

'web_embedded': {

85

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

86

'INNERTUBE_CONTEXT': {

87

'client': {

88

'clientName': 'WEB_EMBEDDED_PLAYER',

89

'clientVersion': '1.20211215.00.01',

90

},

91

},

92

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

93

},

94

'web_music': {

95

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

96

'INNERTUBE_HOST': 'music.youtube.com',

97

'INNERTUBE_CONTEXT': {

98

'client': {

99

'clientName': 'WEB_REMIX',

100

'clientVersion': '1.20211213.00.00',

101

}

102

},

103

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

104

},

105

'web_creator': {

106

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

107

'INNERTUBE_CONTEXT': {

108

'client': {

109

'clientName': 'WEB_CREATOR',

110

'clientVersion': '1.20211220.02.00',

111

}

112

},

113

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

114

},

115

'android': {

116

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

117

'INNERTUBE_CONTEXT': {

118

'client': {

119

'clientName': 'ANDROID',

120

'clientVersion': '16.49',

121

}

122

},

123

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

124

'REQUIRE_JS_PLAYER': False

125

},

126

'android_embedded': {

127

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

128

'INNERTUBE_CONTEXT': {

129

'client': {

130

'clientName': 'ANDROID_EMBEDDED_PLAYER',

131

'clientVersion': '16.49',

132

},

133

},

134

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

135

'REQUIRE_JS_PLAYER': False

136

},

137

'android_music': {

138

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

139

'INNERTUBE_CONTEXT': {

140

'client': {

141

'clientName': 'ANDROID_MUSIC',

142

'clientVersion': '4.57',

143

}

144

},

145

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

146

'REQUIRE_JS_PLAYER': False

147

},

148

'android_creator': {

149

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

150

'INNERTUBE_CONTEXT': {

151

'client': {

152

'clientName': 'ANDROID_CREATOR',

153

'clientVersion': '21.47',

154

},

155

},

156

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

157

'REQUIRE_JS_PLAYER': False

158

},

159

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

160

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

161

'ios': {

162

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

163

'INNERTUBE_CONTEXT': {

164

'client': {

165

'clientName': 'IOS',

166

'clientVersion': '16.46',

167

'deviceModel': 'iPhone14,3',

168

}

169

},

170

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

171

'REQUIRE_JS_PLAYER': False

172

},

173

'ios_embedded': {

174

'INNERTUBE_CONTEXT': {

175

'client': {

176

'clientName': 'IOS_MESSAGES_EXTENSION',

177

'clientVersion': '16.46',

178

'deviceModel': 'iPhone14,3',

179

},

180

},

181

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

182

'REQUIRE_JS_PLAYER': False

183

},

184

'ios_music': {

185

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

186

'INNERTUBE_CONTEXT': {

187

'client': {

188

'clientName': 'IOS_MUSIC',

189

'clientVersion': '4.57',

190

},

191

},

192

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

193

'REQUIRE_JS_PLAYER': False

194

},

195

'ios_creator': {

196

'INNERTUBE_CONTEXT': {

197

'client': {

198

'clientName': 'IOS_CREATOR',

199

'clientVersion': '21.47',

200

},

201

},

202

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

203

'REQUIRE_JS_PLAYER': False

204

},

205

# mweb has 'ultralow' formats

206

# See: https://github.com/yt-dlp/yt-dlp/pull/557

207

'mweb': {

208

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

209

'INNERTUBE_CONTEXT': {

210

'client': {

211

'clientName': 'MWEB',

212

'clientVersion': '2.20211221.01.00',

213

}

214

},

215

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

216

},

217

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

218

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

219

'tv_embedded': {

220

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

221

'INNERTUBE_CONTEXT': {

222

'client': {

223

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

224

'clientVersion': '2.0',

225

},

226

},

227

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

233

variant, *base = client_name.rsplit('.', 1)

234

if base:

235

return variant, base[0], variant

236

base, *variant = client_name.split('_', 1)

237

return client_name, base, variant[0] if variant else None

238

239

240

def build_innertube_clients():

241

THIRD_PARTY = {

242

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

243

}

244

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

245

priority = qualities(BASE_CLIENTS[::-1])

246

247

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

248

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

249

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

250

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

251

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

252

253

_, base_client, variant = _split_innertube_client(client)

254

ytcfg['priority'] = 10 * priority(base_client)

255

256

if not variant:

257

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

258

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

259

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

260

embedscreen['priority'] -= 3

261

elif variant == 'embedded':

262

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

263

ytcfg['priority'] -= 2

264

else:

265

ytcfg['priority'] -= 3

266

267

268

build_innertube_clients()

269

270

271

class YoutubeBaseInfoExtractor(InfoExtractor):

272

"""Provide base functions for Youtube extractors"""

273

274

_RESERVED_NAMES = (

275

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

281

282

# _NETRC_MACHINE = 'youtube'

283

284

# If True it will raise an error if no login info is provided

285

_LOGIN_REQUIRED = False

286

287

_INVIDIOUS_SITES = (

288

# invidious-redirect websites

289

r'(?:www\.)?redirect\.invidious\.io',

290

r'(?:(?:www|dev)\.)?invidio\.us',

291

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

292

r'(?:www\.)?invidious\.pussthecat\.org',

293

r'(?:www\.)?invidious\.zee\.li',

294

r'(?:www\.)?invidious\.ethibox\.fr',

295

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

296

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

297

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

298

# youtube-dl invidious instances list

299

r'(?:(?:www|no)\.)?invidiou\.sh',

300

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

301

r'(?:www\.)?invidious\.kabi\.tk',

302

r'(?:www\.)?invidious\.mastodon\.host',

303

r'(?:www\.)?invidious\.zapashcanon\.fr',

304

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

305

r'(?:www\.)?invidious\.tinfoil-hat\.net',

306

r'(?:www\.)?invidious\.himiko\.cloud',

307

r'(?:www\.)?invidious\.reallyancient\.tech',

308

r'(?:www\.)?invidious\.tube',

309

r'(?:www\.)?invidiou\.site',

310

r'(?:www\.)?invidious\.site',

311

r'(?:www\.)?invidious\.xyz',

312

r'(?:www\.)?invidious\.nixnet\.xyz',

313

r'(?:www\.)?invidious\.048596\.xyz',

314

r'(?:www\.)?invidious\.drycat\.fr',

315

r'(?:www\.)?inv\.skyn3t\.in',

316

r'(?:www\.)?tube\.poal\.co',

317

r'(?:www\.)?tube\.connect\.cafe',

318

r'(?:www\.)?vid\.wxzm\.sx',

319

r'(?:www\.)?vid\.mint\.lgbt',

320

r'(?:www\.)?vid\.puffyan\.us',

321

r'(?:www\.)?yewtu\.be',

322

r'(?:www\.)?yt\.elukerio\.org',

323

r'(?:www\.)?yt\.lelux\.fi',

324

r'(?:www\.)?invidious\.ggc-project\.de',

325

r'(?:www\.)?yt\.maisputain\.ovh',

326

r'(?:www\.)?ytprivate\.com',

327

r'(?:www\.)?invidious\.13ad\.de',

328

r'(?:www\.)?invidious\.toot\.koeln',

329

r'(?:www\.)?invidious\.fdn\.fr',

330

r'(?:www\.)?watch\.nettohikari\.com',

331

r'(?:www\.)?invidious\.namazso\.eu',

332

r'(?:www\.)?invidious\.silkky\.cloud',

333

r'(?:www\.)?invidious\.exonip\.de',

334

r'(?:www\.)?invidious\.riverside\.rocks',

335

r'(?:www\.)?invidious\.blamefran\.net',

336

r'(?:www\.)?invidious\.moomoo\.de',

337

r'(?:www\.)?ytb\.trom\.tf',

338

r'(?:www\.)?yt\.cyberhost\.uk',

339

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

340

r'(?:www\.)?qklhadlycap4cnod\.onion',

341

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

342

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

343

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

344

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

345

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

346

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

347

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

348

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

349

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

350

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

351

)

352

353

def _initialize_consent(self):

354

cookies = self._get_cookies('https://www.youtube.com/')

355

if cookies.get('__Secure-3PSID'):

356

return

357

consent_id = None

358

consent = cookies.get('CONSENT')

359

if consent:

360

if 'YES' in consent.value:

361

return

362

consent_id = self._search_regex(

363

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

364

if not consent_id:

365

consent_id = random.randint(100, 999)

366

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

367

368

def _initialize_pref(self):

369

cookies = self._get_cookies('https://www.youtube.com/')

370

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

375

except ValueError:

376

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

377

pref.update({'hl': 'en', 'tz': 'UTC'})

378

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

379

380

def _real_initialize(self):

381

self._initialize_pref()

382

self._initialize_consent()

383

self._check_login_required()

384

385

def _check_login_required(self):

386

if (self._LOGIN_REQUIRED

387

and self.get_param('cookiefile') is None

388

and self.get_param('cookiesfrombrowser') is None):

389

self.raise_login_required('Login details are needed to download this content', method='cookies')

390

391

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

392

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

393

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

394

395

def _get_default_ytcfg(self, client='web'):

396

return copy.deepcopy(INNERTUBE_CLIENTS[client])

397

398

def _get_innertube_host(self, client='web'):

399

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

400

401

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

402

# try_get but with fallback to default ytcfg client values when present

403

_func = lambda y: try_get(y, getter, expected_type)

404

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

405

406

def _extract_client_name(self, ytcfg, default_client='web'):

407

return self._ytcfg_get_safe(

408

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

409

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

410

411

def _extract_client_version(self, ytcfg, default_client='web'):

412

return self._ytcfg_get_safe(

413

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

414

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

415

416

def _extract_api_key(self, ytcfg=None, default_client='web'):

417

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

418

419

def _extract_context(self, ytcfg=None, default_client='web'):

420

context = get_first(

421

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

422

# Enforce language and tz for extraction

423

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

424

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

430

time_now = round(time.time())

431

if self._SAPISID is None:

432

yt_cookies = self._get_cookies('https://www.youtube.com')

433

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

434

# See: https://github.com/yt-dlp/yt-dlp/issues/393

435

sapisid_cookie = dict_get(

436

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

437

if sapisid_cookie and sapisid_cookie.value:

438

self._SAPISID = sapisid_cookie.value

439

self.write_debug('Extracted SAPISID cookie')

440

# SAPISID cookie is required if not already present

441

if not yt_cookies.get('SAPISID'):

442

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

443

self._set_cookie(

444

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

445

else:

446

self._SAPISID = False

447

if not self._SAPISID:

448

return None

449

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

450

sapisidhash = hashlib.sha1(

451

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

452

return f'SAPISIDHASH {time_now}_{sapisidhash}'

453

454

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

455

note='Downloading API JSON', errnote='Unable to download API page',

456

context=None, api_key=None, api_hostname=None, default_client='web'):

457

458

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

459

data.update(query)

460

real_headers = self.generate_api_headers(default_client=default_client)

461

real_headers.update({'content-type': 'application/json'})

462

if headers:

463

real_headers.update(headers)

464

return self._download_json(

465

f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',

466

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

467

data=json.dumps(data).encode('utf8'), headers=real_headers,

468

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

469

470

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

471

data = self._search_regex(

472

(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',

473

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

474

if data:

475

return self._parse_json(data, item_id, fatal=fatal)

476

477

@staticmethod

478

def _extract_session_index(*data):

479

"""

480

Index of current account in account list.

481

See: https://github.com/yt-dlp/yt-dlp/pull/519

482

"""

483

for ytcfg in data:

484

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

485

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

490

if ytcfg:

491

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

496

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

497

'identity token', default=None, fatal=False)

498

499

@staticmethod

500

def _extract_account_syncid(*args):

501

"""

502

Extract syncId required to download private playlists of secondary channels

503

@params response and/or ytcfg

504

"""

505

for data in args:

506

# ytcfg includes channel_syncid if on secondary channel

507

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

512

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

513

if len(sync_ids) >= 2 and sync_ids[1]:

514

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

515

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

520

"""

521

Extracts visitorData from an API response or ytcfg

522

Appears to be used to track session state

523

"""

524

return get_first(

525

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

530

return bool(self._generate_sapisidhash_header())

531

532

def extract_ytcfg(self, video_id, webpage):

533

if not webpage:

534

return {}

535

return self._parse_json(

536

self._search_regex(

537

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

538

default='{}'), video_id, fatal=False) or {}

539

540

def generate_api_headers(

541

self, *, ytcfg=None, account_syncid=None, session_index=None,

542

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

543

544

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

545

headers = {

546

'X-YouTube-Client-Name': compat_str(

547

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

548

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

549

'Origin': origin,

550

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

551

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

552

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

553

}

554

if session_index is None:

555

session_index = self._extract_session_index(ytcfg)

556

if account_syncid or session_index is not None:

557

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

558

559

auth = self._generate_sapisidhash_header(origin)

560

if auth is not None:

561

headers['Authorization'] = auth

562

headers['X-Origin'] = origin

563

return {h: v for h, v in headers.items() if v is not None}

564

565

def _download_ytcfg(self, client, video_id):

566

url = {

567

'web': 'https://www.youtube.com',

568

'web_music': 'https://music.youtube.com',

569

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

574

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

575

return self.extract_ytcfg(video_id, webpage) or {}

576

577

@staticmethod

578

def _build_api_continuation_query(continuation, ctp=None):

579

query = {

580

'continuation': continuation

581

}

582

# TODO: Inconsistency with clickTrackingParams.

583

# Currently we have a fixed ctp contained within context (from ytcfg)

584

# and a ctp in root query for continuation.

585

if ctp:

586

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

591

next_continuation = try_get(

592

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

593

lambda x: x['continuation']['reloadContinuationData']), dict)

594

if not next_continuation:

595

return

596

continuation = next_continuation.get('continuation')

597

if not continuation:

598

return

599

ctp = next_continuation.get('clickTrackingParams')

600

return cls._build_api_continuation_query(continuation, ctp)

601

602

@classmethod

603

def _extract_continuation_ep_data(cls, continuation_ep: dict):

604

if isinstance(continuation_ep, dict):

605

continuation = try_get(

606

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

607

if not continuation:

608

return

609

ctp = continuation_ep.get('clickTrackingParams')

610

return cls._build_api_continuation_query(continuation, ctp)

611

612

@classmethod

613

def _extract_continuation(cls, renderer):

614

next_continuation = cls._extract_next_continuation_data(renderer)

615

if next_continuation:

616

return next_continuation

617

618

contents = []

619

for key in ('contents', 'items'):

620

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

621

622

for content in contents:

623

if not isinstance(content, dict):

624

continue

625

continuation_ep = try_get(

626

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

627

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

628

dict)

629

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

635

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

636

if not isinstance(alert_dict, dict):

637

continue

638

for alert in alert_dict.values():

639

alert_type = alert.get('type')

640

if not alert_type:

641

continue

642

message = cls._get_text(alert, 'text')

643

if message:

644

yield alert_type, message

645

646

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

647

errors = []

648

warnings = []

649

for alert_type, alert_message in alerts:

650

if alert_type.lower() == 'error' and fatal:

651

errors.append([alert_type, alert_message])

652

else:

653

warnings.append([alert_type, alert_message])

654

655

for alert_type, alert_message in (warnings + errors[:-1]):

656

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

657

if errors:

658

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

659

660

def _extract_and_report_alerts(self, data, *args, **kwargs):

661

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

662

663

def _extract_badges(self, renderer: dict):

664

badges = set()

665

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

666

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

667

if label:

668

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

673

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

678

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

679

obj = [obj]

680

for item in obj:

681

text = try_get(item, lambda x: x['simpleText'], compat_str)

682

if text:

683

return text

684

runs = try_get(item, lambda x: x['runs'], list) or []

685

if not runs and isinstance(item, list):

686

runs = item

687

688

runs = runs[:min(len(runs), max_runs or len(runs))]

689

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

694

count_text = self._get_text(data, *path_list) or ''

695

count = parse_count(count_text)

696

if count is None:

697

count = str_to_int(

698

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

703

"""

704

Extract thumbnails from thumbnails dict

705

@param path_list: path list to level that contains 'thumbnails' key

706

"""

707

thumbnails = []

708

for path in path_list or [()]:

709

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

710

thumbnail_url = url_or_none(thumbnail.get('url'))

711

if not thumbnail_url:

712

continue

713

# Sometimes youtube gives a wrong thumbnail URL. See:

714

# https://github.com/yt-dlp/yt-dlp/issues/233

715

# https://github.com/ytdl-org/youtube-dl/issues/28023

716

if 'maxresdefault' in thumbnail_url:

717

thumbnail_url = thumbnail_url.split('?')[0]

718

thumbnails.append({

719

'url': thumbnail_url,

720

'height': int_or_none(thumbnail.get('height')),

721

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

727

"""

728

Extracts a relative time from string and converts to dt object

729

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

734

if start:

735

return datetime_from_str(start)

736

try:

737

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

742

"""@returns (timestamp, time_text)"""

743

text = self._get_text(renderer, *path_list) or ''

744

dt = self.extract_relative_time(text)

745

timestamp = None

746

if isinstance(dt, datetime.datetime):

747

timestamp = calendar.timegm(dt.timetuple())

748

749

if timestamp is None:

750

timestamp = (

751

unified_timestamp(text) or unified_timestamp(

752

self._search_regex(

753

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

754

text.lower(), 'time text', default=None)))

755

756

if text and timestamp is None:

757

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

758

return timestamp, text

759

760

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

761

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

762

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

767

if check_get_keys is None:

768

check_get_keys = []

769

while count < retries:

770

count += 1

771

if last_error:

772

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

773

try:

774

response = self._call_api(

775

ep=ep, fatal=True, headers=headers,

776

video_id=item_id, query=query,

777

context=self._extract_context(ytcfg, default_client),

778

api_key=self._extract_api_key(ytcfg, default_client),

779

api_hostname=api_hostname, default_client=default_client,

780

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

781

except ExtractorError as e:

782

if isinstance(e.cause, network_exceptions):

783

if isinstance(e.cause, compat_HTTPError):

784

first_bytes = e.cause.read(512)

785

if not is_html(first_bytes):

786

yt_error = try_get(

787

self._parse_json(

788

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

789

lambda x: x['error']['message'], compat_str)

790

if yt_error:

791

self._report_alerts([('ERROR', yt_error)], fatal=False)

792

# Downloading page may result in intermittent 5xx HTTP error

793

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

794

# We also want to catch all other network exceptions since errors in later pages can be troublesome

795

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

796

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

797

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

809

except ExtractorError as e:

810

# YouTube servers may return errors we want to retry on in a 200 OK response

811

# See: https://github.com/yt-dlp/yt-dlp/issues/839

812

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

818

return

819

if not check_get_keys or dict_get(response, check_get_keys):

820

break

821

# Youtube sometimes sends incomplete data

822

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

823

last_error = 'Incomplete data received'

824

if count >= retries:

825

if fatal:

826

raise ExtractorError(last_error)

827

else:

828

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

834

return re.match(r'https?://music\.youtube\.com/', url) is not None

835

836

def _extract_video(self, renderer):

837

video_id = renderer.get('videoId')

838

title = self._get_text(renderer, 'title')

839

description = self._get_text(renderer, 'descriptionSnippet')

840

duration = parse_duration(self._get_text(

841

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

842

if duration is None:

843

duration = parse_duration(self._search_regex(

844

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

845

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

846

video_id, default=None, group='duration'))

847

848

view_count = self._get_count(renderer, 'viewCountText')

849

850

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

851

channel_id = traverse_obj(

852

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

853

expected_type=str, get_all=False)

854

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

855

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

856

overlay_style = traverse_obj(

857

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

858

get_all=False, expected_type=str)

859

badges = self._extract_badges(renderer)

860

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

861

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

862

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

863

expected_type=str)) or ''

864

url = f'https://www.youtube.com/watch?v={video_id}'

865

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

866

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

875

'duration': duration,

876

'view_count': view_count,

877

'uploader': uploader,

878

'channel_id': channel_id,

879

'thumbnails': thumbnails,

880

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

881

if self._configuration_arg('approximate_date', ie_key='youtubetab')

882

else None),

883

'live_status': ('is_upcoming' if scheduled_timestamp is not None

884

else 'was_live' if 'streamed' in time_text.lower()

885

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

886

else None),

887

'release_timestamp': scheduled_timestamp,

888

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

893

IE_DESC = 'YouTube'

894

_VALID_URL = r"""(?x)^

895

(

896

(?:https?://|//) # http(s):// or protocol-independent URL

897

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

898

(?:www\.)?deturl\.com/www\.youtube\.com|

899

(?:www\.)?pwnyoutube\.com|

900

(?:www\.)?hooktube\.com|

901

(?:www\.)?yourepeat\.com|

902

tube\.majestyc\.net|

903

%(invidious)s|

904

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

905

(?:.*?\#/)? # handle anchor (#/) redirect urls

906

(?: # the various things that can precede the ID:

907

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

908

|(?: # or the v= param in all its forms

909

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

910

(?:\?|\#!?) # the params delimiter ? or # or #!

911

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

917

vid\.plus| # or vid.plus/xxxx

918

zwearz\.com/watch| # or zwearz.com/watch/xxxx

919

%(invidious)s

920

)/

921

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

922

)

923

)? # all until now is optional -> you can pass the naked ID

924

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

925

(?(1).+)? # if we found the ID, everything can follow

926

(?:\#|$)""" % {

927

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

928

}

929

_PLAYER_INFO_RE = (

930

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

931

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

932

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

933

)

934

_formats = {

935

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

936

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

937

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

938

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

939

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

940

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

941

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

942

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

943

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

944

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

945

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

946

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

947

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

948

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

949

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

950

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

951

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

952

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

957

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

958

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

959

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

960

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

961

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

962

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

963

964

# Apple HTTP Live Streaming

965

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

966

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

967

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

968

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

969

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

970

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

971

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

972

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

973

974

# DASH mp4 video

975

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

976

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

977

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

979

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

981

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

983

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

984

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

985

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

986

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

988

# Dash mp4 audio

989

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

990

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

991

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

992

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

993

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

994

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

995

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

996

997

# Dash webm

998

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

999

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1000

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1003

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1004

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1005

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1006

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1007

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1012

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1014

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1016

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1017

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1018

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1019

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1020

1021

# Dash webm audio

1022

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1023

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1024

1025

# Dash webm audio with opus inside

1026

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1027

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1028

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1029

1030

# RTMP (unnamed)

1031

'_rtmp': {'protocol': 'rtmp'},

1032

1033

# av01 video only formats sometimes served with "unknown" codecs

1034

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1035

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1036

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1037

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1038

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1039

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1040

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1041

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1042

}

1043

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1055

'uploader': 'Philipp Hagemeister',

1056

'uploader_id': 'phihag',

1057

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1058

'channel': 'Philipp Hagemeister',

1059

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1060

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1061

'upload_date': '20121002',

1062

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1063

'categories': ['Science & Technology'],

1064

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1069

'playable_in_embed': True,

1070

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1071

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1080

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1085

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1086

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1087

'uploader': 'SET India',

1088

'uploader_id': 'setindia',

1089

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1090

'age_limit': 18,

1091

},

1092

'skip': 'Private video',

1093

},

1094

{

1095

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1096

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1101

'uploader': 'Philipp Hagemeister',

1102

'uploader_id': 'phihag',

1103

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1104

'channel': 'Philipp Hagemeister',

1105

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1106

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1107

'upload_date': '20121002',

1108

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1109

'categories': ['Science & Technology'],

1110

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1115

'playable_in_embed': True,

1116

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1117

'live_status': 'not_live',

1118

'age_limit': 0,

1119

'channel_follower_count': int

1120

},

1121

'params': {

1122

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1127

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1132

'uploader_id': '8KVIDEO',

1133

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1134

'description': '',

1135

'uploader': '8KVIDEO',

1136

'title': 'UHDTV TEST 8K VIDEO.mp4'

1137

},

1138

'params': {

1139

'youtube_include_dash_manifest': True,

1140

'format': '141',

1141

},

1142

'skip': 'format 141 not served anymore',

1143

},

1144

# DASH manifest with encrypted signature

1145

{

1146

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1151

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1152

'duration': 244,

1153

'uploader': 'AfrojackVEVO',

1154

'uploader_id': 'AfrojackVEVO',

1155

'upload_date': '20131011',

1156

'abr': 129.495,

1157

'like_count': int,

1158

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1159

'playable_in_embed': True,

1160

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1161

'view_count': int,

1162

'track': 'The Spark',

1163

'live_status': 'not_live',

1164

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1165

'channel': 'Afrojack',

1166

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1167

'tags': 'count:19',

1168

'availability': 'public',

1169

'categories': ['Music'],

1170

'age_limit': 0,

1171

'alt_title': 'The Spark',

1172

'channel_follower_count': int

1173

},

1174

'params': {

1175

'youtube_include_dash_manifest': True,

1176

'format': '141/bestaudio[ext=m4a]',

1177

},

1178

},

1179

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1180

{

1181

'note': 'Embed allowed age-gate video',

1182

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1187

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1188

'duration': 142,

1189

'uploader': 'The Witcher',

1190

'uploader_id': 'WitcherGame',

1191

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1192

'upload_date': '20140605',

1193

'age_limit': 18,

1194

'categories': ['Gaming'],

1195

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1196

'availability': 'needs_auth',

1197

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1198

'like_count': int,

1199

'channel': 'The Witcher',

1200

'live_status': 'not_live',

1201

'tags': 'count:17',

1202

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1203

'playable_in_embed': True,

1204

'view_count': int,

1205

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1210

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1215

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1216

'upload_date': '20200408',

1217

'uploader_id': 'FlyingKitty900',

1218

'uploader': 'FlyingKitty',

1219

'age_limit': 18,

1220

'availability': 'needs_auth',

1221

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1222

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1223

'channel': 'FlyingKitty',

1224

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1225

'view_count': int,

1226

'categories': ['Entertainment'],

1227

'live_status': 'not_live',

1228

'tags': ['Flyingkitty', 'godzilla 2'],

1229

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1230

'like_count': int,

1231

'duration': 177,

1232

'playable_in_embed': True,

1233

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1238

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1239

'info_dict': {

1240

'id': 'Tq92D6wQ1mg',

1241

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1242

'ext': 'mp4',

1243

'upload_date': '20191228',

1244

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1245

'uploader': 'Projekt Melody',

1246

'description': 'md5:17eccca93a786d51bc67646756894066',

1247

'age_limit': 18,

1248

'like_count': int,

1249

'availability': 'needs_auth',

1250

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1251

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1252

'view_count': int,

1253

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1254

'channel': 'Projekt Melody',

1255

'live_status': 'not_live',

1256

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1257

'playable_in_embed': True,

1258

'categories': ['Entertainment'],

1259

'duration': 106,

1260

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1266

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1271

'uploader': 'Herr Lurik',

1272

'uploader_id': 'st3in234',

1273

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1274

'upload_date': '20130730',

1275

'track': 'Such mich find mich',

1276

'age_limit': 0,

1277

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1278

'like_count': int,

1279

'playable_in_embed': False,

1280

'creator': 'OOMPH!',

1281

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1282

'view_count': int,

1283

'alt_title': 'Such mich find mich',

1284

'duration': 210,

1285

'channel': 'Herr Lurik',

1286

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1287

'categories': ['Music'],

1288

'availability': 'public',

1289

'uploader_url': 'http://www.youtube.com/user/st3in234',

1290

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1291

'live_status': 'not_live',

1292

'artist': 'OOMPH!',

1293

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1298

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1299

'only_matching': True,

1300

},

1301

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1302

# YouTube Red ad is not captured for creator

1303

{

1304

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1310

'uploader_id': 'deadmau5',

1311

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1312

'creator': 'deadmau5',

1313

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1314

'uploader': 'deadmau5',

1315

'title': 'Deadmau5 - Some Chords (HD)',

1316

'alt_title': 'Some Chords',

1317

'availability': 'public',

1318

'tags': 'count:14',

1319

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1320

'view_count': int,

1321

'live_status': 'not_live',

1322

'channel': 'deadmau5',

1323

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1324

'like_count': int,

1325

'track': 'Some Chords',

1326

'artist': 'deadmau5',

1327

'playable_in_embed': True,

1328

'age_limit': 0,

1329

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1330

'categories': ['Music'],

1331

'album': 'Some Chords',

1332

'channel_follower_count': int

1333

},

1334

'expected_warnings': [

1335

'DASH manifest missing',

1336

]

1337

},

1338

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1339

{

1340

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1346

'uploader_id': 'olympic',

1347

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1348

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1349

'uploader': 'Olympics',

1350

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1351

'like_count': int,

1352

'release_timestamp': 1343767800,

1353

'playable_in_embed': True,

1354

'categories': ['Sports'],

1355

'release_date': '20120731',

1356

'channel': 'Olympics',

1357

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1358

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1359

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1360

'age_limit': 0,

1361

'availability': 'public',

1362

'live_status': 'was_live',

1363

'view_count': int,

1364

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1365

'channel_follower_count': int

1366

},

1367

'params': {

1368

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1378

'duration': 85,

1379

'upload_date': '20110310',

1380

'uploader_id': 'AllenMeow',

1381

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1382

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1383

'uploader': '孫ᄋᄅ',

1384

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1385

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1390

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1391

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1392

'view_count': int,

1393

'categories': ['People & Blogs'],

1394

'like_count': int,

1395

'live_status': 'not_live',

1396

'availability': 'unlisted',

1397

'channel_follower_count': int

1398

},

1399

},

1400

# url_encoded_fmt_stream_map is empty string

1401

{

1402

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1407

'description': '',

1408

'upload_date': '20150404',

1409

'uploader_id': 'spbelect',

1410

'uploader': 'Наблюдатели Петербурга',

1411

},

1412

'params': {

1413

'skip_download': 'requires avconv',

1414

},

1415

'skip': 'This live event has ended.',

1416

},

1417

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1418

{

1419

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1424

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1425

'duration': 220,

1426

'upload_date': '20150625',

1427

'uploader_id': 'dorappi2000',

1428

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1429

'uploader': 'dorappi2000',

1430

'formats': 'mincount:31',

1431

},

1432

'skip': 'not actual anymore',

1433

},

1434

# DASH manifest with segment_list

1435

{

1436

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1437

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1442

'uploader': 'Airtek',

1443

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1444

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1445

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1446

},

1447

'params': {

1448

'youtube_include_dash_manifest': True,

1449

'format': '135', # bestvideo

1450

},

1451

'skip': 'This live event has ended.',

1452

},

1453

{

1454

# Multifeed videos (multiple cameras), URL is for Main Camera

1455

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1456

'info_dict': {

1457

'id': 'jvGDaLqkpTg',

1458

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1459

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1466

'description': 'md5:e03b909557865076822aa169218d6a5d',

1467

'duration': 10643,

1468

'upload_date': '20161111',

1469

'uploader': 'Team PGP',

1470

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1471

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1478

'description': 'md5:e03b909557865076822aa169218d6a5d',

1479

'duration': 10991,

1480

'upload_date': '20161111',

1481

'uploader': 'Team PGP',

1482

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1483

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1490

'description': 'md5:e03b909557865076822aa169218d6a5d',

1491

'duration': 10995,

1492

'upload_date': '20161111',

1493

'uploader': 'Team PGP',

1494

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1495

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1502

'description': 'md5:e03b909557865076822aa169218d6a5d',

1503

'duration': 10990,

1504

'upload_date': '20161111',

1505

'uploader': 'Team PGP',

1506

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1507

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1512

},

1513

'skip': 'Not multifeed anymore',

1514

},

1515

{

1516

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1517

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1518

'info_dict': {

1519

'id': 'gVfLd0zydlo',

1520

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1521

},

1522

'playlist_count': 2,

1523

'skip': 'Not multifeed anymore',

1524

},

1525

{

1526

'url': 'https://vid.plus/FlRa-iH7PGw',

1527

'only_matching': True,

1528

},

1529

{

1530

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1531

'only_matching': True,

1532

},

1533

{

1534

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1535

# Also tests cut-off URL expansion in video description (see

1536

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1537

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1538

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1543

'alt_title': 'Dark Walk',

1544

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1545

'duration': 133,

1546

'upload_date': '20151119',

1547

'uploader_id': 'IronSoulElf',

1548

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1549

'uploader': 'IronSoulElf',

1550

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1551

'track': 'Dark Walk',

1552

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1553

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1554

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1555

'categories': ['Film & Animation'],

1556

'view_count': int,

1557

'live_status': 'not_live',

1558

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1559

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1560

'tags': 'count:13',

1561

'availability': 'public',

1562

'channel': 'IronSoulElf',

1563

'playable_in_embed': True,

1564

'like_count': int,

1565

'age_limit': 0,

1566

'channel_follower_count': int

1567

},

1568

'params': {

1569

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1574

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1575

'only_matching': True,

1576

},

1577

{

1578

# Video with yt:stretch=17:0

1579

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1584

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1585

'upload_date': '20151107',

1586

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1587

'uploader': 'CH GAMER DROID',

1588

},

1589

'params': {

1590

'skip_download': True,

1591

},

1592

'skip': 'This video does not exist.',

1593

},

1594

{

1595

# Video with incomplete 'yt:stretch=16:'

1596

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1597

'only_matching': True,

1598

},

1599

{

1600

# Video licensed under Creative Commons

1601

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1606

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1607

'duration': 721,

1608

'upload_date': '20150128',

1609

'uploader_id': 'BerkmanCenter',

1610

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1611

'uploader': 'The Berkman Klein Center for Internet & Society',

1612

'license': 'Creative Commons Attribution license (reuse allowed)',

1613

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1614

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1615

'like_count': int,

1616

'age_limit': 0,

1617

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1618

'channel': 'The Berkman Klein Center for Internet & Society',

1619

'availability': 'public',

1620

'view_count': int,

1621

'categories': ['Education'],

1622

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1623

'live_status': 'not_live',

1624

'playable_in_embed': True,

1625

'channel_follower_count': int

1626

},

1627

'params': {

1628

'skip_download': True,

},

},

{

# Channel-like uploader_url

1633

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1638

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1639

'duration': 4060,

1640

'upload_date': '20151120',

1641

'uploader': 'Bernie Sanders',

1642

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1643

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1644

'license': 'Creative Commons Attribution license (reuse allowed)',

1645

'playable_in_embed': True,

1646

'tags': 'count:12',

1647

'like_count': int,

1648

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1649

'age_limit': 0,

1650

'availability': 'public',

1651

'categories': ['News & Politics'],

1652

'channel': 'Bernie Sanders',

1653

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1654

'view_count': int,

1655

'live_status': 'not_live',

1656

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1657

'channel_follower_count': int

1658

},

1659

'params': {

1660

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1665

'only_matching': True,

1666

},

1667

{

1668

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1669

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1670

'only_matching': True,

1671

},

1672

{

1673

# Rental video preview

1674

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1679

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1680

'upload_date': '20150811',

1681

'uploader': 'FlixMatrix',

1682

'uploader_id': 'FlixMatrixKaravan',

1683

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1684

'license': 'Standard YouTube License',

1685

},

1686

'params': {

1687

'skip_download': True,

1688

},

1689

'skip': 'This video is not available.',

1690

},

1691

{

1692

# YouTube Red video with episode data

1693

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1698

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1699

'duration': 2085,

1700

'upload_date': '20170118',

1701

'uploader': 'Vsauce',

1702

'uploader_id': 'Vsauce',

1703

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1704

'series': 'Mind Field',

1705

'season_number': 1,

1706

'episode_number': 1,

1707

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1708

'tags': 'count:12',

1709

'view_count': int,

1710

'availability': 'public',

1711

'age_limit': 0,

1712

'channel': 'Vsauce',

1713

'episode': 'Episode 1',

1714

'categories': ['Entertainment'],

1715

'season': 'Season 1',

1716

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1717

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1718

'like_count': int,

1719

'playable_in_embed': True,

1720

'live_status': 'not_live',

1721

'channel_follower_count': int

1722

},

1723

'params': {

1724

'skip_download': True,

1725

},

1726

'expected_warnings': [

1727

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1732

# as inappropriate or offensive to some audiences.

1733

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1738

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1739

'duration': 965,

1740

'upload_date': '20140124',

1741

'uploader': 'New Century Foundation',

1742

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1743

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1744

},

1745

'params': {

1746

'skip_download': True,

1747

},

1748

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1753

'only_matching': True,

1754

},

1755

{

1756

# geo restricted to JP

1757

'url': 'sJL6WA-aGkQ',

1758

'only_matching': True,

1759

},

1760

{

1761

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1762

'only_matching': True,

1763

},

1764

{

1765

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1766

'only_matching': True,

1767

},

1768

{

1769

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1770

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1771

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1776

'only_matching': True,

1777

},

1778

{

1779

# Video with unsupported adaptive stream type formats

1780

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1785

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1786

'duration': 433,

1787

'upload_date': '20130923',

1788

'uploader': 'Amelia Putri Harwita',

1789

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1790

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1791

'formats': 'maxcount:10',

1792

},

1793

'params': {

1794

'skip_download': True,

1795

'youtube_include_dash_manifest': False,

1796

},

1797

'skip': 'not actual anymore',

1798

},

1799

{

1800

# Youtube Music Auto-generated description

1801

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1806

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1807

'upload_date': '20190312',

1808

'uploader': 'Stephen - Topic',

1809

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1810

'artist': 'Stephen',

1811

'track': 'Voyeur Girl',

1812

'album': 'it\'s too much love to know my dear',

1813

'release_date': '20190313',

1814

'release_year': 2019,

1815

'alt_title': 'Voyeur Girl',

1816

'view_count': int,

1817

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1818

'playable_in_embed': True,

1819

'like_count': int,

1820

'categories': ['Music'],

1821

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1822

'channel': 'Stephen',

1823

'availability': 'public',

1824

'creator': 'Stephen',

1825

'duration': 169,

1826

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1827

'age_limit': 0,

1828

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1829

'tags': 'count:11',

1830

'live_status': 'not_live',

1831

'channel_follower_count': int

1832

},

1833

'params': {

1834

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1839

'only_matching': True,

1840

},

1841

{

1842

# invalid -> valid video id redirection

1843

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1848

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1849

'upload_date': '20090125',

1850

'uploader': 'Prochorowka',

1851

'uploader_id': 'Prochorowka',

1852

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1853

'artist': 'Panjabi MC',

1854

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1855

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1856

},

1857

'params': {

1858

'skip_download': True,

1859

},

1860

'skip': 'Video unavailable',

1861

},

1862

{

1863

# empty description results in an empty string

1864

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1871

'uploader_id': 'ElevageOrVert',

1872

'uploader': 'ElevageOrVert',

1873

'view_count': int,

1874

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1875

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1876

'like_count': int,

1877

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1878

'tags': [],

1879

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1880

'availability': 'public',

1881

'age_limit': 0,

1882

'categories': ['Pets & Animals'],

1883

'duration': 7,

1884

'playable_in_embed': True,

1885

'live_status': 'not_live',

1886

'channel': 'ElevageOrVert',

1887

'channel_follower_count': int

1888

},

1889

'params': {

1890

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1895

# see [2] for an example with '};' inside ytInitialPlayerResponse

1896

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1897

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1898

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1903

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1904

'upload_date': '20130831',

1905

'uploader_id': 'kudvenkat',

1906

'uploader': 'kudvenkat',

1907

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1908

'like_count': int,

1909

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1910

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1911

'live_status': 'not_live',

1912

'categories': ['Education'],

1913

'availability': 'public',

1914

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1915

'tags': 'count:12',

1916

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1921

'channel_follower_count': int

1922

},

1923

'params': {

1924

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1929

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1930

'only_matching': True,

1931

},

1932

{

1933

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1934

'only_matching': True,

1935

},

1936

{

1937

# https://github.com/ytdl-org/youtube-dl/pull/28094

1938

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1944

'upload_date': '20141120',

1945

'uploader': 'The Cinematic Orchestra - Topic',

1946

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1947

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1948

'artist': 'The Cinematic Orchestra',

1949

'track': 'Burn Out',

1950

'album': 'Every Day',

1951

'like_count': int,

1952

'live_status': 'not_live',

1953

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1958

'creator': 'The Cinematic Orchestra',

1959

'channel': 'The Cinematic Orchestra',

1960

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1961

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1962

'availability': 'public',

1963

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1964

'categories': ['Music'],

1965

'playable_in_embed': True,

1966

'channel_follower_count': int

1967

},

1968

'params': {

1969

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1974

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1975

'only_matching': True,

1976

},

1977

{

1978

# controversial video, requires bpctr/contentCheckOk

1979

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1984

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1985

'uploader': 'CBS Mornings',

1986

'uploader_id': 'CBSThisMorning',

1987

'upload_date': '20140716',

1988

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1989

'duration': 170,

1990

'categories': ['News & Politics'],

1991

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1992

'view_count': int,

1993

'channel': 'CBS Mornings',

1994

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1995

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1996

'age_limit': 18,

1997

'availability': 'needs_auth',

1998

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1999

'like_count': int,

2000

'live_status': 'not_live',

2001

'playable_in_embed': True,

2002

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2007

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2012

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2013

'upload_date': '20201120',

2014

'uploader': 'Walk around Japan',

2015

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2016

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2017

'duration': 1456,

2018

'categories': ['Travel & Events'],

2019

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2020

'view_count': int,

2021

'channel': 'Walk around Japan',

2022

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2023

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2024

'age_limit': 0,

2025

'availability': 'public',

2026

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2027

'live_status': 'not_live',

2028

'playable_in_embed': True,

2029

'channel_follower_count': int

2030

},

2031

'params': {

2032

'skip_download': True,

2033

},

2034

}, {

2035

# Has multiple audio streams

2036

'url': 'WaOKSUlf4TM',

2037

'only_matching': True

2038

}, {

2039

# Requires Premium: has format 141 when requested using YTM url

2040

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2041

'only_matching': True

2042

}, {

2043

# multiple subtitles with same lang_code

2044

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2045

'only_matching': True,

2046

}, {

2047

# Force use android client fallback

2048

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2049

'info_dict': {

2050

'id': 'YOelRv7fMxY',

2051

'title': 'DIGGING A SECRET TUNNEL Part 1',

2052

'ext': '3gp',

2053

'upload_date': '20210624',

2054

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2055

'uploader': 'colinfurze',

2056

'uploader_id': 'colinfurze',

2057

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2058

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2059

'duration': 596,

2060

'categories': ['Entertainment'],

2061

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2062

'view_count': int,

2063

'channel': 'colinfurze',

2064

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2065

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2066

'age_limit': 0,

2067

'availability': 'public',

2068

'like_count': int,

2069

'live_status': 'not_live',

2070

'playable_in_embed': True,

2071

'channel_follower_count': int

2072

},

2073

'params': {

2074

'format': '17', # 3gp format available on android

2075

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2080

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2081

'only_matching': True,

2082

'params': {

2083

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2088

'only_matching': True,

2089

}, {

2090

'note': 'Storyboards',

2091

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2097

'uploader_id': 'scishow',

2098

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2099

'upload_date': '20140324',

2100

'uploader': 'SciShow',

2101

'like_count': int,

2102

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2103

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2104

'view_count': int,

2105

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2106

'playable_in_embed': True,

2107

'tags': 'count:12',

2108

'uploader_url': 'http://www.youtube.com/user/scishow',

2109

'availability': 'public',

2110

'channel': 'SciShow',

2111

'live_status': 'not_live',

2112

'duration': 248,

2113

'categories': ['Education'],

2114

'age_limit': 0,

2115

'channel_follower_count': int

2116

}, 'params': {'format': 'mhtml', 'skip_download': True}

2117

}, {

2118

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2119

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2124

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2125

'uploader': 'Leon Nguyen',

2126

'uploader_id': 'VNSXIII',

2127

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2128

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2129

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2134

'tags': 'count:23',

2135

'playable_in_embed': True,

2136

'live_status': 'not_live',

2137

'upload_date': '20220103',

2138

'like_count': int,

2139

'availability': 'public',

2140

'channel': 'Leon Nguyen',

2141

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2142

'channel_follower_count': int

2143

}

2144

}, {

2145

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2146

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2151

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2152

'uploader': 'Quackity',

2153

'uploader_id': 'QuackityHQ',

2154

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2155

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2156

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2161

'tags': 'count:26',

2162

'playable_in_embed': True,

2163

'live_status': 'not_live',

2164

'release_timestamp': 1641172509,

2165

'release_date': '20220103',

2166

'upload_date': '20220103',

2167

'like_count': int,

2168

'availability': 'public',

2169

'channel': 'Quackity',

2170

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2171

'channel_follower_count': int

2172

}

2173

},

2174

{ # continuous livestream. Microformat upload date should be preferred.

2175

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2176

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2177

'info_dict': {

2178

'id': 'kgx4WGK0oNU',

2179

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2180

'ext': 'mp4',

2181

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2182

'availability': 'public',

2183

'age_limit': 0,

2184

'release_timestamp': 1637975704,

2185

'upload_date': '20210619',

2186

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2187

'live_status': 'is_live',

2188

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2189

'uploader': '阿鲍Abao',

2190

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2191

'channel': 'Abao in Tokyo',

2192

'channel_follower_count': int,

2193

'release_date': '20211127',

2194

'tags': 'count:39',

2195

'categories': ['People & Blogs'],

2196

'like_count': int,

2197

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2198

'view_count': int,

2199

'playable_in_embed': True,

2200

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2201

},

2202

'params': {'skip_download': True}

2203

}, {

2204

# Story. Requires specific player params to work.

2205

# Note: stories get removed after some period of time

2206

'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',

'info_dict': {

'id': 'yN3x1t3sieA',

'ext': 'mp4',

'uploader': 'Linus Tech Tips',

2211

'duration': 13,

2212

'channel': 'Linus Tech Tips',

2213

'playable_in_embed': True,

2214

'tags': [],

2215

'age_limit': 0,

2216

'uploader_url': 'http://www.youtube.com/user/LinusTechTips',

2217

'upload_date': '20220402',

2218

'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',

2219

'title': 'Story',

2220

'live_status': 'not_live',

2221

'uploader_id': 'LinusTechTips',

2222

'view_count': int,

2223

'description': '',

2224

'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',

2225

'categories': ['Science & Technology'],

2226

'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',

2227

'availability': 'unlisted',

}

}

]

@classmethod

def suitable(cls, url):

2234

from ..utils import parse_qs

2235

2236

qs = parse_qs(url)

2237

if qs.get('list', [None])[0]:

2238

return False

2239

return super().suitable(url)

2240

2241

def __init__(self, *args, **kwargs):

2242

super().__init__(*args, **kwargs)

2243

self._code_cache = {}

2244

self._player_cache = {}

2245

2246

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2247

lock = threading.Lock()

2248

2249

is_live = True

2250

start_time = time.time()

2251

formats = [f for f in formats if f.get('is_from_start')]

2252

2253

def refetch_manifest(format_id, delay):

2254

nonlocal formats, start_time, is_live

2255

if time.time() <= start_time + delay:

2256

return

2257

2258

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2259

video_details = traverse_obj(

2260

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2261

microformats = traverse_obj(

2262

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2263

expected_type=dict, default=[])

2264

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2265

start_time = time.time()

2266

2267

def mpd_feed(format_id, delay):

2268

"""

2269

@returns (manifest_url, manifest_stream_number, is_live) or None

2270

"""

2271

with lock:

2272

refetch_manifest(format_id, delay)

2273

2274

f = next((f for f in formats if f['format_id'] == format_id), None)

2275

if not f:

2276

if not is_live:

2277

self.to_screen(f'{video_id}: Video is no longer live')

2278

else:

2279

self.report_warning(

2280

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2281

return None

2282

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2287

f['fragments'] = functools.partial(

2288

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2289

2290

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2291

FETCH_SPAN, MAX_DURATION = 5, 432000

2292

2293

mpd_url, stream_number, is_live = None, None, True

2294

2295

begin_index = 0

2296

download_start_time = ctx.get('start') or time.time()

2297

2298

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2299

if lack_early_segments:

2300

self.report_warning(bug_reports_message(

2301

'Starting download from the last 120 hours of the live stream since '

2302

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2303

lack_early_segments = True

2304

2305

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2306

fragments, fragment_base_url = None, None

2307

2308

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2309

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2310

# Obtain from MPD's maximum seq value

2311

old_mpd_url = mpd_url

2312

last_error = ctx.pop('last_error', None)

2313

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2314

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2315

or (mpd_url, stream_number, False))

2316

if not refresh_sequence:

2317

if expire_fast and not is_live:

2318

return False, last_seq

2319

elif old_mpd_url == mpd_url:

2320

return True, last_seq

2321

try:

2322

fmts, _ = self._extract_mpd_formats_and_subtitles(

2323

mpd_url, None, note=False, errnote=False, fatal=False)

2324

except ExtractorError:

2325

fmts = None

2326

if not fmts:

2327

no_fragment_score += 2

2328

return False, last_seq

2329

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2330

fragments = fmt_info['fragments']

2331

fragment_base_url = fmt_info['fragment_base_url']

2332

assert fragment_base_url

2333

2334

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2335

return True, _last_seq

2336

2337

while is_live:

2338

fetch_time = time.time()

2339

if no_fragment_score > 30:

2340

return

2341

if last_segment_url:

2342

# Obtain from "X-Head-Seqnum" header value from each segment

2343

try:

2344

urlh = self._request_webpage(

2345

last_segment_url, None, note=False, errnote=False, fatal=False)

2346

except ExtractorError:

2347

urlh = None

2348

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2349

if last_seq is None:

2350

no_fragment_score += 2

2351

last_segment_url = None

2352

continue

2353

else:

2354

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2355

no_fragment_score += 2

2356

if not should_continue:

2357

continue

2358

2359

if known_idx > last_seq:

2360

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2366

# skip from the start when it's negative value

2367

known_idx = last_seq + begin_index

2368

if lack_early_segments:

2369

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2370

try:

2371

for idx in range(known_idx, last_seq):

2372

# do not update sequence here or you'll get skipped some part of it

2373

should_continue, _ = _extract_sequence_from_mpd(False, False)

2374

if not should_continue:

2375

known_idx = idx - 1

2376

raise ExtractorError('breaking out of outer loop')

2377

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2378

yield {

2379

'url': last_segment_url,

2380

}

2381

if known_idx == last_seq:

2382

no_fragment_score += 5

2383

else:

2384

no_fragment_score = 0

2385

known_idx = last_seq

2386

except ExtractorError:

2387

continue

2388

2389

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2390

2391

def _extract_player_url(self, *ytcfgs, webpage=None):

2392

player_url = traverse_obj(

2393

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2394

get_all=False, expected_type=compat_str)

2395

if not player_url:

2396

return

2397

return urljoin('https://www.youtube.com', player_url)

2398

2399

def _download_player_url(self, video_id, fatal=False):

2400

res = self._download_webpage(

2401

'https://www.youtube.com/iframe_api',

2402

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2403

if res:

2404

player_version = self._search_regex(

2405

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2406

if player_version:

2407

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2408

2409

def _signature_cache_id(self, example_sig):

2410

""" Return a string representation of a signature """

2411

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2412

2413

@classmethod

2414

def _extract_player_info(cls, player_url):

2415

for player_re in cls._PLAYER_INFO_RE:

2416

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2421

return id_m.group('id')

2422

2423

def _load_player(self, video_id, player_url, fatal=True):

2424

player_id = self._extract_player_info(player_url)

2425

if player_id not in self._code_cache:

2426

code = self._download_webpage(

2427

player_url, video_id, fatal=fatal,

2428

note='Downloading player ' + player_id,

2429

errnote='Download of %s failed' % player_url)

2430

if code:

2431

self._code_cache[player_id] = code

2432

return self._code_cache.get(player_id)

2433

2434

def _extract_signature_function(self, video_id, player_url, example_sig):

2435

player_id = self._extract_player_info(player_url)

2436

2437

# Read from filesystem cache

2438

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2439

assert os.path.basename(func_id) == func_id

2440

2441

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2442

if cache_spec is not None:

2443

return lambda s: ''.join(s[i] for i in cache_spec)

2444

2445

code = self._load_player(video_id, player_url)

2446

if code:

2447

res = self._parse_sig_js(code)

2448

2449

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2450

cache_res = res(test_string)

2451

cache_spec = [ord(c) for c in cache_res]

2452

2453

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2454

return res

2455

2456

def _print_sig_code(self, func, example_sig):

2457

if not self.get_param('youtube_print_sig_code'):

2458

return

2459

2460

def gen_sig_code(idxs):

2461

def _genslice(start, end, step):

2462

starts = '' if start == 0 else str(start)

2463

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2464

steps = '' if step == 1 else (':%d' % step)

2465

return f's[{starts}{ends}{steps}]'

2466

2467

step = None

2468

# Quelch pyflakes warnings - start will be set when step is set

2469

start = '(Never used)'

2470

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2475

step = None

2476

continue

2477

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2487

2488

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2489

cache_res = func(test_string)

2490

cache_spec = [ord(c) for c in cache_res]

2491

expr_code = ' + '.join(gen_sig_code(cache_spec))

2492

signature_id_tuple = '(%s)' % (

2493

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2494

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2495

' return %s\n') % (signature_id_tuple, expr_code)

2496

self.to_screen('Extracted signature function:\n' + code)

2497

2498

def _parse_sig_js(self, jscode):

2499

funcname = self._search_regex(

2500

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2501

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2502

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2503

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2504

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2505

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2506

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2507

# Obsolete patterns

2508

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2509

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2510

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2511

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2512

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2513

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2514

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2515

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2516

jscode, 'Initial JS player signature function name', group='sig')

2517

2518

jsi = JSInterpreter(jscode)

2519

initial_function = jsi.extract_function(funcname)

2520

return lambda s: initial_function([s])

2521

2522

def _decrypt_signature(self, s, video_id, player_url):

2523

"""Turn the encrypted s field into a working signature"""

2524

2525

if player_url is None:

2526

raise ExtractorError('Cannot decrypt signature without player_url')

2527

2528

try:

2529

player_id = (player_url, self._signature_cache_id(s))

2530

if player_id not in self._player_cache:

2531

func = self._extract_signature_function(

2532

video_id, player_url, s

2533

)

2534

self._player_cache[player_id] = func

2535

func = self._player_cache[player_id]

2536

self._print_sig_code(func, s)

2537

return func(s)

2538

except Exception as e:

2539

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2540

2541

def _decrypt_nsig(self, s, video_id, player_url):

2542

"""Turn the encrypted n field into a working signature"""

2543

if player_url is None:

2544

raise ExtractorError('Cannot decrypt nsig without player_url')

2545

player_url = urljoin('https://www.youtube.com', player_url)

2546

2547

sig_id = ('nsig_value', s)

2548

if sig_id in self._player_cache:

2549

return self._player_cache[sig_id]

2550

2551

try:

2552

player_id = ('nsig', player_url)

2553

if player_id not in self._player_cache:

2554

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2555

func = self._player_cache[player_id]

2556

self._player_cache[sig_id] = func(s)

2557

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2558

return self._player_cache[sig_id]

2559

except Exception as e:

2560

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2561

2562

def _extract_n_function_name(self, jscode):

2563

nfunc, idx = self._search_regex(

2564

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2565

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2566

if not idx:

2567

return nfunc

2568

return json.loads(js_to_json(self._search_regex(

2569

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2570

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2571

2572

def _extract_n_function(self, video_id, player_url):

2573

player_id = self._extract_player_info(player_url)

2574

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2575

2576

if func_code:

2577

jsi = JSInterpreter(func_code)

2578

else:

2579

jscode = self._load_player(video_id, player_url)

2580

funcname = self._extract_n_function_name(jscode)

2581

jsi = JSInterpreter(jscode)

2582

func_code = jsi.extract_function_code(funcname)

2583

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2584

2585

if self.get_param('youtube_print_sig_code'):

2586

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2587

2588

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2589

2590

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2591

"""

2592

Extract signatureTimestamp (sts)

2593

Required to tell API what sig/player version is in use.

2594

"""

2595

sts = None

2596

if isinstance(ytcfg, dict):

2597

sts = int_or_none(ytcfg.get('STS'))

2598

2599

if not sts:

2600

# Attempt to extract from player

2601

if player_url is None:

2602

error_msg = 'Cannot extract signature timestamp without player_url.'

2603

if fatal:

2604

raise ExtractorError(error_msg)

2605

self.report_warning(error_msg)

2606

return

2607

code = self._load_player(video_id, player_url, fatal=fatal)

2608

if code:

2609

sts = int_or_none(self._search_regex(

2610

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2611

'JS player signature timestamp', group='sts', fatal=fatal))

2612

return sts

2613

2614

def _mark_watched(self, video_id, player_responses):

2615

playback_url = get_first(

2616

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2617

expected_type=url_or_none)

2618

if not playback_url:

2619

self.report_warning('Unable to mark watched')

2620

return

2621

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2622

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2623

2624

# cpn generation algorithm is reverse engineered from base.js.

2625

# In fact it works even with dummy cpn.

2626

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2627

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2634

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2635

2636

self._download_webpage(

2637

playback_url, video_id, 'Marking watched',

2638

'Unable to mark watched', fatal=False)

2639

2640

@staticmethod

2641

def _extract_urls(webpage):

2642

# Embedded YouTube player

2643

entries = [

2644

unescapeHTML(mobj.group('url'))

2645

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2656

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2657

\1''', webpage)]

2658

2659

# lazyYT YouTube embed

2660

entries.extend(list(map(

2661

unescapeHTML,

2662

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2663

2664

# Wordpress "YouTube Video Importer" plugin

2665

matches = re.findall(r'''(?x)<div[^>]+

2666

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2667

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2668

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2674

urls = YoutubeIE._extract_urls(webpage)

2675

return urls[0] if urls else None

2676

2677

@classmethod

2678

def extract_id(cls, url):

2679

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2680

if mobj is None:

2681

raise ExtractorError('Invalid URL: %s' % url)

2682

return mobj.group('id')

2683

2684

def _extract_chapters_from_json(self, data, duration):

2685

chapter_list = traverse_obj(

2686

data, (

2687

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2688

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2689

), expected_type=list)

2690

2691

return self._extract_chapters(

2692

chapter_list,

2693

chapter_time=lambda chapter: float_or_none(

2694

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2695

chapter_title=lambda chapter: traverse_obj(

2696

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2697

duration=duration)

2698

2699

def _extract_chapters_from_engagement_panel(self, data, duration):

2700

content_list = traverse_obj(

2701

data,

2702

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2703

expected_type=list, default=[])

2704

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2705

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2710

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2711

chapter_time, chapter_title, duration)

2712

for contents in content_list

2713

))), [])

2714

2715

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2716

chapters = []

2717

last_chapter = {'start_time': 0}

2718

for idx, chapter in enumerate(chapter_list or []):

2719

title = chapter_title(chapter)

2720

start_time = chapter_time(chapter)

2721

if start_time is None:

2722

continue

2723

last_chapter['end_time'] = start_time

2724

if start_time < last_chapter['start_time']:

2725

if idx == 1:

2726

chapters.pop()

2727

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2728

else:

2729

self.report_warning(f'Invalid start time for chapter "{title}"')

2730

continue

2731

last_chapter = {'start_time': start_time, 'title': title}

2732

chapters.append(last_chapter)

2733

last_chapter['end_time'] = duration

2734

return chapters

2735

2736

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2737

return self._parse_json(self._search_regex(

2738

(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',

2739

regex), webpage, name, default='{}'), video_id, fatal=False)

2740

2741

def _extract_comment(self, comment_renderer, parent=None):

2742

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2747

2748

# note: timestamp is an estimate calculated from the current time and time_text

2749

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2750

author = self._get_text(comment_renderer, 'authorText')

2751

author_id = try_get(comment_renderer,

2752

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2753

2754

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2755

lambda x: x['likeCount']), compat_str)) or 0

2756

author_thumbnail = try_get(comment_renderer,

2757

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2758

2759

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2760

is_favorited = 'creatorHeart' in (try_get(

2761

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2766

'time_text': time_text,

2767

'like_count': votes,

2768

'is_favorited': is_favorited,

2769

'author': author,

2770

'author_id': author_id,

2771

'author_thumbnail': author_thumbnail,

2772

'author_is_uploader': author_is_uploader,

2773

'parent': parent or 'root'

2774

}

2775

2776

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2777

2778

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2779

2780

def extract_header(contents):

2781

_continuation = None

2782

for content in contents:

2783

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2784

expected_comment_count = self._get_count(

2785

comments_header_renderer, 'countText', 'commentsCount')

2786

2787

if expected_comment_count:

2788

tracker['est_total'] = expected_comment_count

2789

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2790

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2791

2792

sort_menu_item = try_get(

2793

comments_header_renderer,

2794

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2795

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2796

2797

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2798

if not _continuation:

2799

continue

2800

2801

sort_text = str_or_none(sort_menu_item.get('title'))

2802

if not sort_text:

2803

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2804

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2809

if not parent:

2810

tracker['current_page_thread'] = 0

2811

for content in contents:

2812

if not parent and tracker['total_parent_comments'] >= max_parents:

2813

yield

2814

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2815

comment_renderer = get_first(

2816

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2817

expected_type=dict, default={})

2818

2819

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2824

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2825

yield comment

2826

2827

# Attempt to get the replies

2828

comment_replies_renderer = try_get(

2829

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2830

2831

if comment_replies_renderer:

2832

tracker['current_page_thread'] += 1

2833

comment_entries_iter = self._comment_entries(

2834

comment_replies_renderer, ytcfg, video_id,

2835

parent=comment.get('id'), tracker=tracker)

2836

yield from itertools.islice(comment_entries_iter, min(

2837

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2838

2839

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2845

total_parent_comments=0,

2846

total_reply_comments=0)

2847

2848

# TODO: Deprecated

2849

# YouTube comments have a max depth of 2

2850

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2851

if max_depth:

2852

self._downloader.deprecation_warning(

2853

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2854

if max_depth == 1 and parent:

2855

return

2856

2857

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2858

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2859

2860

continuation = self._extract_continuation(root_continuation_data)

2861

2862

response = None

2863

is_forced_continuation = False

2864

is_first_continuation = parent is None

2865

if is_first_continuation and not continuation:

2866

# Sometimes you can get comments by generating the continuation yourself,

2867

# even if YouTube initially reports them being disabled - e.g. stories comments.

2868

# Note: if the comment section is actually disabled, YouTube may return a response with

2869

# required check_get_keys missing. So we will disable that check initially in this case.

2870

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2871

is_forced_continuation = True

2872

2873

for page_num in itertools.count(0):

2874

if not continuation:

2875

break

2876

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2877

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2878

if page_num == 0:

2879

if is_first_continuation:

2880

note_prefix = 'Downloading comment section API JSON'

2881

else:

2882

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2883

tracker['current_page_thread'], comment_prog_str)

2884

else:

2885

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2886

' ' if parent else '', ' replies' if parent else '',

2887

page_num, comment_prog_str)

2888

2889

response = self._extract_response(

2890

item_id=None, query=continuation,

2891

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2892

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2893

is_forced_continuation = False

2894

continuation_contents = traverse_obj(

2895

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2896

2897

continuation = None

2898

for continuation_section in continuation_contents:

2899

continuation_items = traverse_obj(

2900

continuation_section,

2901

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2902

get_all=False, expected_type=list) or []

2903

if is_first_continuation:

2904

continuation = extract_header(continuation_items)

2905

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2919

if message and not parent and tracker['running_total'] == 0:

2920

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2921

2922

@staticmethod

2923

def _generate_comment_continuation(video_id):

2924

"""

2925

Generates initial comment section continuation token from given video id

2926

"""

2927

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2928

return base64.b64encode(token.encode()).decode()

2929

2930

def _get_comments(self, ytcfg, video_id, contents, webpage):

2931

"""Entry for comment extraction"""

2932

def _real_comment_extract(contents):

2933

renderer = next((

2934

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2935

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2936

yield from self._comment_entries(renderer, ytcfg, video_id)

2937

2938

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2939

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2940

2941

@staticmethod

2942

def _get_checkok_params():

2943

return {'contentCheckOk': True, 'racyCheckOk': True}

2944

2945

@classmethod

2946

def _generate_player_context(cls, sts=None):

2947

context = {

2948

'html5Preference': 'HTML5_PREF_WANTS',

2949

}

2950

if sts is not None:

2951

context['signatureTimestamp'] = sts

2952

return {

2953

'playbackContext': {

2954

'contentPlaybackContext': context

2955

},

2956

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2961

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2962

return True

2963

2964

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2965

AGE_GATE_REASONS = (

2966

'confirm your age', 'age-restricted', 'inappropriate', # reason

2967

'age_verification_required', 'age_check_required', # status

2968

)

2969

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2970

2971

@staticmethod

2972

def _is_unplayable(player_response):

2973

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2974

2975

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2976

2977

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2978

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2979

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2980

headers = self.generate_api_headers(

2981

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

2986

}

2987

yt_query.update(self._generate_player_context(sts))

2988

return self._extract_response(

2989

item_id=video_id, ep='player', query=yt_query,

2990

ytcfg=player_ytcfg, headers=headers, fatal=True,

2991

default_client=client,

2992

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2993

) or None

2994

2995

def _get_requested_clients(self, url, smuggled_data):

2996

requested_clients = []

2997

default = ['android', 'web']

2998

allowed_clients = sorted(

2999

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3000

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3001

for client in self._configuration_arg('player_client'):

3002

if client in allowed_clients:

3003

requested_clients.append(client)

3004

elif client == 'default':

3005

requested_clients.extend(default)

3006

elif client == 'all':

3007

requested_clients.extend(allowed_clients)

3008

else:

3009

self.report_warning(f'Skipping unsupported client {client}')

3010

if not requested_clients:

3011

requested_clients = default

3012

3013

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3014

requested_clients.extend(

3015

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3016

3017

return orderedSet(requested_clients)

3018

3019

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3020

initial_pr = None

3021

if webpage:

3022

initial_pr = self._extract_yt_initial_variable(

3023

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

3024

video_id, 'initial player response')

3025

3026

all_clients = set(clients)

3027

clients = clients[::-1]

3028

prs = []

3029

3030

def append_client(*client_names):

3031

""" Append the first client name that exists but not already used """

3032

for client_name in client_names:

3033

actual_client = _split_innertube_client(client_name)[0]

3034

if actual_client in INNERTUBE_CLIENTS:

3035

if actual_client not in all_clients:

3036

clients.append(client_name)

3037

all_clients.add(actual_client)

3038

return

3039

3040

# Android player_response does not have microFormats which are needed for

3041

# extraction of some data. So we return the initial_pr with formats

3042

# stripped out even if not requested by the user

3043

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3044

if initial_pr:

3045

pr = dict(initial_pr)

3046

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3051

player_url = None

3052

while clients:

3053

client, base_client, variant = _split_innertube_client(clients.pop())

3054

player_ytcfg = master_ytcfg if client == 'web' else {}

3055

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3056

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3057

3058

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3059

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3060

if 'js' in self._configuration_arg('player_skip'):

3061

require_js_player = False

3062

player_url = None

3063

3064

if not player_url and not tried_iframe_fallback and require_js_player:

3065

player_url = self._download_player_url(video_id)

3066

tried_iframe_fallback = True

3067

3068

try:

3069

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3070

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3071

except ExtractorError as e:

3072

if last_error:

3073

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3081

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3082

append_client(f'{base_client}_creator')

3083

elif self._is_agegated(pr):

3084

if variant == 'tv_embedded':

3085

append_client(f'{base_client}_embedded')

3086

elif not variant:

3087

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3093

return prs, player_url

3094

3095

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3096

itags, stream_ids = {}, []

3097

itag_qualities, res_qualities = {}, {}

3098

q = qualities([

3099

# Normally tiny is the smallest video-only formats. But

3100

# audio-only formats with unknown quality may get tagged as tiny

3101

'tiny',

3102

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3103

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3104

])

3105

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3106

3107

for fmt in streaming_formats:

3108

if fmt.get('targetDurationSec'):

3109

continue

3110

3111

itag = str_or_none(fmt.get('itag'))

3112

audio_track = fmt.get('audioTrack') or {}

3113

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3114

if stream_id in stream_ids:

3115

continue

3116

3117

quality = fmt.get('quality')

3118

height = int_or_none(fmt.get('height'))

3119

if quality == 'tiny' or not quality:

3120

quality = fmt.get('audioQuality', '').lower() or quality

3121

# The 3gp format (17) in android client has a quality of "small",

3122

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3128

if height:

3129

res_qualities[height] = quality

3130

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3131

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3132

# number of fragment that would subsequently requested with (`&sq=N`)

3133

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3134

continue

3135

3136

fmt_url = fmt.get('url')

3137

if not fmt_url:

3138

sc = compat_parse_qs(fmt.get('signatureCipher'))

3139

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3140

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3141

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

3146

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

3147

fmt_url += '&' + sp + '=' + signature

3148

3149

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3154

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3155

except ExtractorError as e:

3156

self.report_warning(

3157

'nsig extraction failed: You may experience throttling for some formats\n'

3158

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3163

stream_ids.append(stream_id)

3164

3165

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3166

language_preference = (

3167

10 if audio_track.get('audioIsDefault') and 10

3168

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3169

else -1)

3170

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3171

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3172

# Make sure to avoid false positives with small duration differences.

3173

# Eg: __2ABJjxzNo, ySuUZEjARPY

3174

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3175

if is_damaged:

3176

self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3177

dct = {

3178

'asr': int_or_none(fmt.get('audioSampleRate')),

3179

'filesize': int_or_none(fmt.get('contentLength')),

3180

'format_id': itag,

3181

'format_note': join_nonempty(

3182

'%s%s' % (audio_track.get('displayName') or '',

3183

' (default)' if language_preference > 0 else ''),

3184

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3185

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3186

'source_preference': -10 if throttled else -1,

3187

'fps': int_or_none(fmt.get('fps')) or None,

3188

'height': height,

3189

'quality': q(quality),

3190

'has_drm': bool(fmt.get('drmFamilies')),

3191

'tbr': tbr,

3192

'url': fmt_url,

3193

'width': int_or_none(fmt.get('width')),

3194

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3195

'desc' if language_preference < -1 else ''),

3196

'language_preference': language_preference,

3197

# Strictly de-prioritize damaged and 3gp formats

3198

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3199

}

3200

mime_mobj = re.match(

3201

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3202

if mime_mobj:

3203

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3204

dct.update(parse_codecs(mime_mobj.group(2)))

3205

no_audio = dct.get('acodec') == 'none'

3206

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3212

dct['downloader_options'] = {

3213

# Youtube throttles chunks >~10M

3214

'http_chunk_size': 10485760,

3215

}

3216

if dct.get('ext'):

3217

dct['container'] = dct['ext'] + '_dash'

3218

yield dct

3219

3220

live_from_start = is_live and self.get_param('live_from_start')

3221

skip_manifests = self._configuration_arg('skip')

3222

if not self.get_param('youtube_include_hls_manifest', True):

3223

skip_manifests.append('hls')

3224

get_dash = 'dash' not in skip_manifests and (

3225

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3226

get_hls = not live_from_start and 'hls' not in skip_manifests

3227

3228

def process_manifest_format(f, proto, itag):

3229

if itag in itags:

3230

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3231

return False

3232

itag = f'{itag}-{proto}'

3233

if itag:

3234

f['format_id'] = itag

3235

itags[itag] = proto

3236

3237

f['quality'] = next((

3238

q(qdict[val])

3239

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3240

if val in qdict), -1)

3241

return True

3242

3243

for sd in streaming_data:

3244

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3245

if hls_manifest_url:

3246

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3247

if process_manifest_format(f, 'hls', self._search_regex(

3248

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3249

yield f

3250

3251

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3252

if dash_manifest_url:

3253

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3254

if process_manifest_format(f, 'dash', f['format_id']):

3255

f['filesize'] = int_or_none(self._search_regex(

3256

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3257

if live_from_start:

3258

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3263

spec = get_first(

3264

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3265

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3270

args = args.split('#')

3271

counts = list(map(int_or_none, args[:5]))

3272

if len(args) != 8 or not all(counts):

3273

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3274

continue

3275

width, height, frame_count, cols, rows = counts

3276

N, sigh = args[6:]

3277

3278

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3279

fragment_count = frame_count / (cols * rows)

3280

fragment_duration = duration / fragment_count

3281

yield {

3282

'format_id': f'sb{i}',

3283

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3293

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3294

} for j in range(math.ceil(fragment_count))],

3295

}

3296

3297

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3298

webpage = None

3299

if 'webpage' not in self._configuration_arg('player_skip'):

3300

webpage = self._download_webpage(

3301

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3302

3303

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3304

3305

player_responses, player_url = self._extract_player_responses(

3306

self._get_requested_clients(url, smuggled_data),

3307

video_id, webpage, master_ytcfg)

3308

3309

return webpage, master_ytcfg, player_responses, player_url

3310

3311

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3312

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3313

is_live = get_first(video_details, 'isLive')

3314

if is_live is None:

3315

is_live = get_first(live_broadcast_details, 'isLiveNow')

3316

3317

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3318

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3319

3320

return live_broadcast_details, is_live, streaming_data, formats

3321

3322

def _real_extract(self, url):

3323

url, smuggled_data = unsmuggle_url(url, {})

3324

video_id = self._match_id(url)

3325

3326

base_url = self.http_scheme() + '//www.youtube.com/'

3327

webpage_url = base_url + 'watch?v=' + video_id

3328

3329

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3330

3331

playability_statuses = traverse_obj(

3332

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3333

3334

trailer_video_id = get_first(

3335

playability_statuses,

3336

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3337

expected_type=str)

3338

if trailer_video_id:

3339

return self.url_result(

3340

trailer_video_id, self.ie_key(), trailer_video_id)

3341

3342

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3343

if webpage else (lambda x: None))

3344

3345

video_details = traverse_obj(

3346

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3347

microformats = traverse_obj(

3348

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3349

expected_type=dict, default=[])

3350

video_title = (

3351

get_first(video_details, 'title')

3352

or self._get_text(microformats, (..., 'title'))

3353

or search_meta(['og:title', 'twitter:title', 'title']))

3354

video_description = get_first(video_details, 'shortDescription')

3355

3356

multifeed_metadata_list = get_first(

3357

player_responses,

3358

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3359

expected_type=str)

3360

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3361

if self.get_param('noplaylist'):

3362

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3367

# Unquote should take place before split on comma (,) since textual

3368

# fields may contain comma as well (see

3369

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3370

feed_data = compat_parse_qs(

3371

compat_urllib_parse_unquote_plus(feed))

3372

3373

def feed_entry(name):

3374

return try_get(

3375

feed_data, lambda x: x[name][0], compat_str)

3376

3377

feed_id = feed_entry('id')

3378

if not feed_id:

3379

continue

3380

feed_title = feed_entry('title')

3381

title = video_title

3382

if feed_title:

3383

title += ' (%s)' % feed_title

3384

entries.append({

3385

'_type': 'url_transparent',

3386

'ie_key': 'Youtube',

3387

'url': smuggle_url(

3388

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3389

{'force_singlefeed': True}),

3390

'title': title,

3391

})

3392

feed_ids.append(feed_id)

3393

self.to_screen(

3394

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3395

% (', '.join(feed_ids), video_id))

3396

return self.playlist_result(

3397

entries, video_id, video_title, video_description)

3398

3399

duration = int_or_none(

3400

get_first(video_details, 'lengthSeconds')

3401

or get_first(microformats, 'lengthSeconds')

3402

or parse_duration(search_meta('duration'))) or None

3403

3404

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3405

video_id, microformats, video_details, player_responses, player_url, duration)

3406

3407

if not formats:

3408

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3409

self.report_drm(video_id)

3410

pemr = get_first(

3411

playability_statuses,

3412

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3413

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3414

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3415

if subreason:

3416

if subreason == 'The uploader has not made this video available in your country.':

3417

countries = get_first(microformats, 'availableCountries')

3418

if not countries:

3419

regions_allowed = search_meta('regionsAllowed')

3420

countries = regions_allowed.split(',') if regions_allowed else None

3421

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3422

reason += f'. {subreason}'

3423

if reason:

3424

self.raise_no_formats(reason, expected=True)

3425

3426

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3427

if not keywords and webpage:

3428

keywords = [

3429

unescapeHTML(m.group('content'))

3430

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3431

for keyword in keywords:

3432

if keyword.startswith('yt:stretch='):

3433

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3434

if mobj:

3435

# NB: float is intentional for forcing float division

3436

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3441

f['stretched_ratio'] = ratio

3442

break

3443

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3444

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3445

if thumbnail_url:

3446

thumbnails.append({

3447

'url': thumbnail_url,

3448

})

3449

original_thumbnails = thumbnails.copy()

3450

3451

# The best resolution thumbnails sometimes does not appear in the webpage

3452

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3453

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3454

thumbnail_names = [

3455

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3456

# in resolution, these are not the custom thumbnail. So de-prioritize them

3457

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3458

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3459

]

3460

n_thumbnail_names = len(thumbnail_names)

3461

thumbnails.extend({

3462

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3463

video_id=video_id, name=name, ext=ext,

3464

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3465

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3466

for thumb in thumbnails:

3467

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3468

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3469

self._remove_duplicate_formats(thumbnails)

3470

self._downloader._sort_thumbnails(original_thumbnails)

3471

3472

category = get_first(microformats, 'category') or search_meta('genre')

3473

channel_id = str_or_none(

3474

get_first(video_details, 'channelId')

3475

or get_first(microformats, 'externalChannelId')

3476

or search_meta('channelId'))

3477

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3478

3479

live_content = get_first(video_details, 'isLiveContent')

3480

is_upcoming = get_first(video_details, 'isUpcoming')

3481

if is_live is None:

3482

if is_upcoming or live_content is False:

3483

is_live = False

3484

if is_upcoming is None and (live_content or is_live):

3485

is_upcoming = False

3486

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3487

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3488

if not duration and live_end_time and live_start_time:

3489

duration = live_end_time - live_start_time

3490

3491

if is_live and self.get_param('live_from_start'):

3492

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3493

3494

formats.extend(self._extract_storyboard(player_responses, duration))

3495

3496

# Source is given priority since formats that throttle are given lower source_preference

3497

# When throttling issue is fully fixed, remove this

3498

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3503

'formats': formats,

3504

'thumbnails': thumbnails,

3505

# The best thumbnail that we are sure exists. Prevents unnecessary

3506

# URL checking if user don't care about getting the best possible thumbnail

3507

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3508

'description': video_description,

3509

'uploader': get_first(video_details, 'author'),

3510

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3511

'uploader_url': owner_profile_url,

3512

'channel_id': channel_id,

3513

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3514

'duration': duration,

3515

'view_count': int_or_none(

3516

get_first((video_details, microformats), (..., 'viewCount'))

3517

or search_meta('interactionCount')),

3518

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3519

'age_limit': 18 if (

3520

get_first(microformats, 'isFamilySafe') is False

3521

or search_meta('isFamilyFriendly') == 'false'

3522

or search_meta('og:restrictions:age') == '18+') else 0,

3523

'webpage_url': webpage_url,

3524

'categories': [category] if category else None,

3525

'tags': keywords,

3526

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3527

'is_live': is_live,

3528

'was_live': (False if is_live or is_upcoming or live_content is False

3529

else None if is_live is None or is_upcoming is None

3530

else live_content),

3531

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3532

'release_timestamp': live_start_time,

3533

}

3534

3535

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3536

if pctr:

3537

def get_lang_code(track):

3538

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3539

or track.get('languageCode'))

3540

3541

# Converted into dicts to remove duplicates

3542

captions = {

3543

get_lang_code(sub): sub

3544

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3545

translation_languages = {

3546

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3547

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3548

3549

def process_language(container, base_url, lang_code, sub_name, query):

3550

lang_subs = container.setdefault(lang_code, [])

3551

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3562

for lang_code, caption_track in captions.items():

3563

base_url = caption_track.get('baseUrl')

3564

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3565

if not base_url:

3566

continue

3567

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3568

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3573

if not caption_track.get('isTranslatable'):

3574

continue

3575

for trans_code, trans_name in translation_languages.items():

3576

if not trans_code:

3577

continue

3578

orig_trans_code = trans_code

3579

if caption_track.get('kind') != 'asr':

3580

if 'translated_subs' in self._configuration_arg('skip'):

3581

continue

3582

trans_code += f'-{lang_code}'

3583

trans_name += format_field(lang_name, template=' from %s')

3584

# Add an "-orig" label to the original language so that it can be distinguished.

3585

# The subs are returned without "-orig" as well for compatibility

3586

if lang_code == f'a-{orig_trans_code}':

3587

process_language(

3588

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3589

# Setting tlang=lang returns damaged subtitles.

3590

process_language(automatic_captions, base_url, trans_code, trans_name,

3591

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3592

info['automatic_captions'] = automatic_captions

3593

info['subtitles'] = subtitles

3594

3595

parsed_url = compat_urllib_parse_urlparse(url)

3596

for component in [parsed_url.fragment, parsed_url.query]:

3597

query = compat_parse_qs(component)

3598

for k, v in query.items():

3599

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3600

d_k += '_time'

3601

if d_k not in info and k in s_ks:

3602

info[d_k] = parse_duration(query[k][0])

3603

3604

# Youtube Music Auto-generated description

3605

if video_description:

3606

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3607

if mobj:

3608

release_year = mobj.group('release_year')

3609

release_date = mobj.group('release_date')

3610

if release_date:

3611

release_date = release_date.replace('-', '')

3612

if not release_year:

3613

release_year = release_date[:4]

3614

info.update({

3615

'album': mobj.group('album'.strip()),

3616

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3617

'track': mobj.group('track').strip(),

3618

'release_date': release_date,

3619

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3625

webpage, self._YT_INITIAL_DATA_RE, video_id,

3626

'yt initial data')

3627

if not initial_data:

3628

query = {'videoId': video_id}

3629

query.update(self._get_checkok_params())

3630

initial_data = self._extract_response(

3631

item_id=video_id, ep='next', fatal=False,

3632

ytcfg=master_ytcfg, query=query,

3633

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3634

note='Downloading initial data API JSON')

3635

3636

try: # This will error if there is no livechat

3637

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3638

except (KeyError, IndexError, TypeError):

3639

pass

3640

else:

3641

info.setdefault('subtitles', {})['live_chat'] = [{

3642

'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies

3643

'video_id': video_id,

3644

'ext': 'json',

3645

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3651

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3652

or None)

3653

3654

contents = traverse_obj(

3655

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3656

expected_type=list, default=[])

3657

3658

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3659

if vpir:

3660

stl = vpir.get('superTitleLink')

3661

if stl:

3662

stl = self._get_text(stl)

3663

if try_get(

3664

vpir,

3665

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3666

info['location'] = stl

3667

else:

3668

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3669

if mobj:

3670

info.update({

3671

'series': mobj.group(1),

3672

'season_number': int(mobj.group(2)),

3673

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3678

list) or []):

3679

tbr = tlb.get('toggleButtonRenderer') or {}

3680

for getter, regex in [(

3681

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3682

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3683

lambda x: x['accessibility'],

3684

lambda x: x['accessibilityData']['accessibilityData'],

3685

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3686

label = (try_get(tbr, getter, dict) or {}).get('label')

3687

if label:

3688

mobj = re.match(regex, label)

3689

if mobj:

3690

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3691

break

3692

sbr_tooltip = try_get(

3693

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3694

if sbr_tooltip:

3695

like_count, dislike_count = sbr_tooltip.split(' / ')

3696

info.update({

3697

'like_count': str_to_int(like_count),

3698

'dislike_count': str_to_int(dislike_count),

3699

})

3700

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3701

if vsir:

3702

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3703

info.update({

3704

'channel': self._get_text(vor, 'title'),

3705

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3710

list) or []

3711

multiple_songs = False

3712

for row in rows:

3713

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3714

multiple_songs = True

3715

break

3716

for row in rows:

3717

mrr = row.get('metadataRowRenderer') or {}

3718

mrr_title = mrr.get('title')

3719

if not mrr_title:

3720

continue

3721

mrr_title = self._get_text(mrr, 'title')

3722

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3723

if mrr_title == 'License':

3724

info['license'] = mrr_contents_text

3725

elif not multiple_songs:

3726

if mrr_title == 'Album':

3727

info['album'] = mrr_contents_text

3728

elif mrr_title == 'Artist':

3729

info['artist'] = mrr_contents_text

3730

elif mrr_title == 'Song':

3731

info['track'] = mrr_contents_text

3732

3733

fallbacks = {

3734

'channel': 'uploader',

3735

'channel_id': 'uploader_id',

3736

'channel_url': 'uploader_url',

3737

}

3738

3739

# The upload date for scheduled, live and past live streams / premieres in microformats

3740

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3741

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3742

upload_date = (

3743

unified_strdate(get_first(microformats, 'uploadDate'))

3744

or unified_strdate(search_meta('uploadDate')))

3745

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3746

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3747

info['upload_date'] = upload_date

3748

3749

for to, frm in fallbacks.items():

3750

if not info.get(to):

3751

info[to] = info.get(frm)

3752

3753

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3759

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3760

is_membersonly = None

3761

is_premium = None

3762

if initial_data and is_private is not None:

3763

is_membersonly = False

3764

is_premium = False

3765

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3766

badge_labels = set()

3767

for content in contents:

3768

if not isinstance(content, dict):

3769

continue

3770

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3771

for badge_label in badge_labels:

3772

if badge_label.lower() == 'members only':

3773

is_membersonly = True

3774

elif badge_label.lower() == 'premium':

3775

is_premium = True

3776

elif badge_label.lower() == 'unlisted':

3777

is_unlisted = True

3778

3779

info['availability'] = self._availability(

3780

is_private=is_private,

3781

needs_premium=is_premium,

3782

needs_subscription=is_membersonly,

3783

needs_auth=info['age_limit'] >= 18,

3784

is_unlisted=None if is_private is None else is_unlisted)

3785

3786

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3787

3788

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3794

3795

@staticmethod

3796

def passthrough_smuggled_data(func):

3797

def _smuggle(entries, smuggled_data):

3798

for entry in entries:

3799

# TODO: Convert URL to music.youtube instead.

3800

# Do we need to passthrough any other smuggled_data?

3801

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3802

yield entry

3803

3804

@functools.wraps(func)

3805

def wrapper(self, url):

3806

url, smuggled_data = unsmuggle_url(url, {})

3807

if self.is_music_url(url):

3808

smuggled_data['is_music_url'] = True

3809

info_dict = func(self, url, smuggled_data)

3810

if smuggled_data and info_dict.get('entries'):

3811

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3816

channel_id = self._html_search_meta(

3817

'channelId', webpage, 'channel id', default=None)

3818

if channel_id:

3819

return channel_id

3820

channel_url = self._html_search_meta(

3821

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3822

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3823

'twitter:app:url:googleplay'), webpage, 'channel url')

3824

return self._search_regex(

3825

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3826

channel_url, 'channel id')

3827

3828

@staticmethod

3829

def _extract_basic_item_renderer(item):

3830

# Modified from _extract_grid_item_renderer

3831

known_basic_renderers = (

3832

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3833

)

3834

for key, renderer in item.items():

3835

if not isinstance(renderer, dict):

3836

continue

3837

elif key in known_basic_renderers:

3838

return renderer

3839

elif key.startswith('grid') and key.endswith('Renderer'):

3840

return renderer

3841

3842

def _grid_entries(self, grid_renderer):

3843

for item in grid_renderer['items']:

3844

if not isinstance(item, dict):

3845

continue

3846

renderer = self._extract_basic_item_renderer(item)

3847

if not isinstance(renderer, dict):

3848

continue

3849

title = self._get_text(renderer, 'title')

3850

3851

# playlist

3852

playlist_id = renderer.get('playlistId')

3853

if playlist_id:

3854

yield self.url_result(

3855

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3856

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3861

if video_id:

3862

yield self._extract_video(renderer)

3863

continue

3864

# channel

3865

channel_id = renderer.get('channelId')

3866

if channel_id:

3867

yield self.url_result(

3868

'https://www.youtube.com/channel/%s' % channel_id,

3869

ie=YoutubeTabIE.ie_key(), video_title=title)

3870

continue

3871

# generic endpoint URL support

3872

ep_url = urljoin('https://www.youtube.com/', try_get(

3873

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3874

compat_str))

3875

if ep_url:

3876

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3877

if ie.suitable(ep_url):

3878

yield self.url_result(

3879

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3880

break

3881

3882

def _music_reponsive_list_entry(self, renderer):

3883

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3884

if video_id:

3885

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3886

ie=YoutubeIE.ie_key(), video_id=video_id)

3887

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3888

if playlist_id:

3889

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3890

if video_id:

3891

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3892

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3893

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3894

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3895

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3896

if browse_id:

3897

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3898

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3899

3900

def _shelf_entries_from_content(self, shelf_renderer):

3901

content = shelf_renderer.get('content')

3902

if not isinstance(content, dict):

3903

return

3904

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3905

if renderer:

3906

# TODO: add support for nested playlists so each shelf is processed

3907

# as separate playlist

3908

# TODO: this includes only first N items

3909

yield from self._grid_entries(renderer)

3910

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3916

ep = try_get(

3917

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3918

compat_str)

3919

shelf_url = urljoin('https://www.youtube.com', ep)

3920

if shelf_url:

3921

# Skipping links to another channels, note that checking for

3922

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3923

# will not work

3924

if skip_channels and '/channels?' in shelf_url:

3925

return

3926

title = self._get_text(shelf_renderer, 'title')

3927

yield self.url_result(shelf_url, video_title=title)

3928

# Shelf may not contain shelf URL, fallback to extraction from content

3929

yield from self._shelf_entries_from_content(shelf_renderer)

3930

3931

def _playlist_entries(self, video_list_renderer):

3932

for content in video_list_renderer['contents']:

3933

if not isinstance(content, dict):

3934

continue

3935

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3936

if not isinstance(renderer, dict):

3937

continue

3938

video_id = renderer.get('videoId')

3939

if not video_id:

3940

continue

3941

yield self._extract_video(renderer)

3942

3943

def _rich_entries(self, rich_grid_renderer):

3944

renderer = try_get(

3945

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3946

video_id = renderer.get('videoId')

3947

if not video_id:

3948

return

3949

yield self._extract_video(renderer)

3950

3951

def _video_entry(self, video_renderer):

3952

video_id = video_renderer.get('videoId')

3953

if video_id:

3954

return self._extract_video(video_renderer)

3955

3956

def _hashtag_tile_entry(self, hashtag_tile_renderer):

3957

url = urljoin('https://youtube.com', traverse_obj(

3958

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

3959

if url:

3960

return self.url_result(

3961

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

3962

3963

def _post_thread_entries(self, post_thread_renderer):

3964

post_renderer = try_get(

3965

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3966

if not post_renderer:

3967

return

3968

# video attachment

3969

video_renderer = try_get(

3970

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3971

video_id = video_renderer.get('videoId')

3972

if video_id:

3973

entry = self._extract_video(video_renderer)

3974

if entry:

3975

yield entry

3976

# playlist attachment

3977

playlist_id = try_get(

3978

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3979

if playlist_id:

3980

yield self.url_result(

3981

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3982

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3983

# inline video links

3984

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3985

for run in runs:

3986

if not isinstance(run, dict):

3987

continue

3988

ep_url = try_get(

3989

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3990

if not ep_url:

3991

continue

3992

if not YoutubeIE.suitable(ep_url):

3993

continue

3994

ep_video_id = YoutubeIE._match_id(ep_url)

3995

if video_id == ep_video_id:

3996

continue

3997

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3998

3999

def _post_thread_continuation_entries(self, post_thread_continuation):

4000

contents = post_thread_continuation.get('contents')

4001

if not isinstance(contents, list):

4002

return

4003

for content in contents:

4004

renderer = content.get('backstagePostThreadRenderer')

4005

if not isinstance(renderer, dict):

4006

continue

4007

yield from self._post_thread_entries(renderer)

4008

4009

r''' # unused

4010

def _rich_grid_entries(self, contents):

4011

for content in contents:

4012

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4013

if video_renderer:

4014

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4020

# continuation_list is modified in-place with continuation_list = [continuation_token]

4021

continuation_list[:] = [None]

4022

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4023

for content in contents:

4024

if not isinstance(content, dict):

4025

continue

4026

is_renderer = traverse_obj(

4027

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4028

expected_type=dict)

4029

if not is_renderer:

4030

renderer = content.get('richItemRenderer')

4031

if renderer:

4032

for entry in self._rich_entries(renderer):

4033

yield entry

4034

continuation_list[0] = self._extract_continuation(parent_renderer)

4035

continue

4036

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4037

for isr_content in isr_contents:

4038

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4043

'gridRenderer': self._grid_entries,

4044

'reelShelfRenderer': self._grid_entries,

4045

'shelfRenderer': self._shelf_entries,

4046

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4047

'backstagePostThreadRenderer': self._post_thread_entries,

4048

'videoRenderer': lambda x: [self._video_entry(x)],

4049

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4050

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4051

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4052

}

4053

for key, renderer in isr_content.items():

4054

if key not in known_renderers:

4055

continue

4056

for entry in known_renderers[key](renderer):

4057

if entry:

4058

yield entry

4059

continuation_list[0] = self._extract_continuation(renderer)

4060

break

4061

4062

if not continuation_list[0]:

4063

continuation_list[0] = self._extract_continuation(is_renderer)

4064

4065

if not continuation_list[0]:

4066

continuation_list[0] = self._extract_continuation(parent_renderer)

4067

4068

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4069

continuation_list = [None]

4070

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4071

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4076

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4077

yield from extract_entries(parent_renderer)

4078

continuation = continuation_list[0]

4079

4080

for page_num in itertools.count(1):

4081

if not continuation:

4082

break

4083

headers = self.generate_api_headers(

4084

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4085

response = self._extract_response(

4086

item_id=f'{item_id} page {page_num}',

4087

query=continuation, headers=headers, ytcfg=ytcfg,

4088

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4093

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4094

visitor_data = self._extract_visitor_data(response) or visitor_data

4095

4096

known_continuation_renderers = {

4097

'playlistVideoListContinuation': self._playlist_entries,

4098

'gridContinuation': self._grid_entries,

4099

'itemSectionContinuation': self._post_thread_continuation_entries,

4100

'sectionListContinuation': extract_entries, # for feeds

4101

}

4102

continuation_contents = try_get(

4103

response, lambda x: x['continuationContents'], dict) or {}

4104

continuation_renderer = None

4105

for key, value in continuation_contents.items():

4106

if key not in known_continuation_renderers:

4107

continue

4108

continuation_renderer = value

4109

continuation_list = [None]

4110

yield from known_continuation_renderers[key](continuation_renderer)

4111

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4112

break

4113

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4118

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4119

'gridVideoRenderer': (self._grid_entries, 'items'),

4120

'gridChannelRenderer': (self._grid_entries, 'items'),

4121

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4122

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4123

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4124

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4125

}

4126

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4127

continuation_items = try_get(

4128

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4129

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4130

video_items_renderer = None

4131

for key, value in continuation_item.items():

4132

if key not in known_renderers:

4133

continue

4134

video_items_renderer = {known_renderers[key][1]: continuation_items}

4135

continuation_list = [None]

4136

yield from known_renderers[key][0](video_items_renderer)

4137

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4138

break

4139

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4145

for tab in tabs:

4146

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4147

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4152

4153

def _extract_uploader(self, data):

4154

uploader = {}

4155

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4156

owner = try_get(

4157

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4158

if owner:

4159

owner_text = owner.get('text')

4160

uploader['uploader'] = self._search_regex(

4161

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4162

uploader['uploader_id'] = try_get(

4163

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4164

uploader['uploader_url'] = urljoin(

4165

'https://www.youtube.com/',

4166

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4167

return {k: v for k, v in uploader.items() if v is not None}

4168

4169

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4170

playlist_id = title = description = channel_url = channel_name = channel_id = None

4171

tags = []

4172

4173

selected_tab = self._extract_selected_tab(tabs)

4174

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4175

renderer = try_get(

4176

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4177

if renderer:

4178

channel_name = renderer.get('title')

4179

channel_url = renderer.get('channelUrl')

4180

channel_id = renderer.get('externalId')

4181

else:

4182

renderer = try_get(

4183

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4184

4185

if renderer:

4186

title = renderer.get('title')

4187

description = renderer.get('description', '')

4188

playlist_id = channel_id

4189

tags = renderer.get('keywords', '').split()

4190

4191

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4192

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4193

def _get_uncropped(url):

4194

return url_or_none((url or '').split('=')[0] + '=s0')

4195

4196

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4197

if avatar_thumbnails:

4198

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4199

if uncropped_avatar:

4200

avatar_thumbnails.append({

4201

'url': uncropped_avatar,

4202

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4207

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4208

for banner in channel_banners:

4209

banner['preference'] = -10

4210

4211

if channel_banners:

4212

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4213

if uncropped_banner:

4214

channel_banners.append({

4215

'url': uncropped_banner,

4216

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4221

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4222

4223

if playlist_id is None:

4224

playlist_id = item_id

4225

4226

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4227

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4228

if title is None:

4229

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4230

title += format_field(selected_tab, 'title', ' - %s')

4231

title += format_field(selected_tab, 'expandedText', ' - %s')

4232

4233

metadata = {

4234

'playlist_id': playlist_id,

4235

'playlist_title': title,

4236

'playlist_description': description,

4237

'uploader': channel_name,

4238

'uploader_id': channel_id,

4239

'uploader_url': channel_url,

4240

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4241

'tags': tags,

4242

'view_count': self._get_count(playlist_stats, 1),

4243

'availability': self._extract_availability(data),

4244

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4245

'playlist_count': self._get_count(playlist_stats, 0),

4246

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4247

}

4248

if not channel_id:

4249

metadata.update(self._extract_uploader(data))

4250

metadata.update({

4251

'channel': metadata['uploader'],

4252

'channel_id': metadata['uploader_id'],

4253

'channel_url': metadata['uploader_url']})

4254

return self.playlist_result(

4255

self._entries(

4256

selected_tab, playlist_id, ytcfg,

4257

self._extract_account_syncid(ytcfg, data),

4258

self._extract_visitor_data(data, ytcfg)),

4259

**metadata)

4260

4261

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4262

first_id = last_id = response = None

4263

for page_num in itertools.count(1):

4264

videos = list(self._playlist_entries(playlist))

4265

if not videos:

4266

return

4267

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4268

if start >= len(videos):

4269

return

4270

for video in videos[start:]:

4271

yield video

4272

first_id = first_id or videos[0]['id']

4273

last_id = videos[-1]['id']

4274

watch_endpoint = try_get(

4275

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4276

headers = self.generate_api_headers(

4277

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4278

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4279

query = {

4280

'playlistId': playlist_id,

4281

'videoId': watch_endpoint.get('videoId') or last_id,

4282

'index': watch_endpoint.get('index') or len(videos),

4283

'params': watch_endpoint.get('params') or 'OAE%3D'

4284

}

4285

response = self._extract_response(

4286

item_id='%s page %d' % (playlist_id, page_num),

4287

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4288

check_get_keys='contents'

4289

)

4290

playlist = try_get(

4291

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4292

4293

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4294

title = playlist.get('title') or try_get(

4295

data, lambda x: x['titleText']['simpleText'], compat_str)

4296

playlist_id = playlist.get('playlistId') or item_id

4297

4298

# Delegating everything except mix playlists to regular tab-based playlist URL

4299

playlist_url = urljoin(url, try_get(

4300

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4301

compat_str))

4302

4303

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4304

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4305

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4306

4307

if playlist_url and playlist_url != url and not is_known_unviewable:

4308

return self.url_result(

4309

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4310

video_title=title)

4311

4312

return self.playlist_result(

4313

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4314

playlist_id=playlist_id, playlist_title=title)

4315

4316

def _extract_availability(self, data):

4317

"""

4318

Gets the availability of a given playlist/tab.

4319

Note: Unless YouTube tells us explicitly, we do not assume it is public

4320

@param data: response

4321

"""

4322

is_private = is_unlisted = None

4323

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4324

badge_labels = self._extract_badges(renderer)

4325

4326

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4327

privacy_dropdown_entries = try_get(

4328

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4329

for renderer_dict in privacy_dropdown_entries:

4330

is_selected = try_get(

4331

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4332

if not is_selected:

4333

continue

4334

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4335

if label:

4336

badge_labels.add(label.lower())

4337

break

4338

4339

for badge_label in badge_labels:

4340

if badge_label == 'unlisted':

4341

is_unlisted = True

4342

elif badge_label == 'private':

4343

is_private = True

4344

elif badge_label == 'public':

4345

is_unlisted = is_private = False

4346

return self._availability(is_private, False, False, False, is_unlisted)

4347

4348

@staticmethod

4349

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4350

sidebar_renderer = try_get(

4351

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4352

for item in sidebar_renderer:

4353

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4358

"""

4359

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4360

"""

4361

browse_id = params = None

4362

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4363

if not renderer:

4364

return

4365

menu_renderer = try_get(

4366

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4367

for menu_item in menu_renderer:

4368

if not isinstance(menu_item, dict):

4369

continue

4370

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4371

text = try_get(

4372

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4373

if not text or text.lower() != 'show unavailable videos':

4374

continue

4375

browse_endpoint = try_get(

4376

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4377

browse_id = browse_endpoint.get('browseId')

4378

params = browse_endpoint.get('params')

4379

break

4380

4381

headers = self.generate_api_headers(

4382

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4383

visitor_data=self._extract_visitor_data(data, ytcfg))

4384

query = {

4385

'params': params or 'wgYCCAA=',

4386

'browseId': browse_id or 'VL%s' % item_id

4387

}

4388

return self._extract_response(

4389

item_id=item_id, headers=headers, query=query,

4390

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4391

note='Downloading API JSON with unavailable videos')

4392

4393

@property

4394

def skip_webpage(self):

4395

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4396

4397

def _extract_webpage(self, url, item_id, fatal=True):

4398

retries = self.get_param('extractor_retries', 3)

4399

count = -1

4400

webpage = data = last_error = None

4401

while count < retries:

4402

count += 1

4403

# Sometimes youtube returns a webpage with incomplete ytInitialData

4404

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4405

if last_error:

4406

self.report_warning('%s. Retrying ...' % last_error)

4407

try:

4408

webpage = self._download_webpage(

4409

url, item_id,

4410

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4411

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4412

except ExtractorError as e:

4413

if isinstance(e.cause, network_exceptions):

4414

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4415

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4425

except ExtractorError as e:

4426

if fatal:

4427

raise

4428

self.report_warning(error_to_compat_str(e))

4429

break

4430

4431

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4432

break

4433

4434

last_error = 'Incomplete yt initial data received'

4435

if count >= retries:

4436

if fatal:

4437

raise ExtractorError(last_error)

4438

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4444

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4445

if not ytcfg and self.is_authenticated:

4446

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4447

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4448

raise ExtractorError(

4449

f'{msg}. If you are not downloading private content, or '

4450

'your cookies are only for the first account and channel,'

4451

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4452

expected=True)

4453

self.report_warning(msg, only_once=True)

4454

4455

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4456

data = None

4457

if not self.skip_webpage:

4458

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4459

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4460

# Reject webpage data if redirected to home page without explicitly requesting

4461

selected_tab = self._extract_selected_tab(traverse_obj(

4462

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4463

if (url != 'https://www.youtube.com/feed/recommended'

4464

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4465

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4466

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4467

if fatal:

4468

raise ExtractorError(msg, expected=True)

4469

self.report_warning(msg, only_once=True)

4470

if not data:

4471

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4472

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4473

return data, ytcfg

4474

4475

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4476

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4477

resolve_response = self._extract_response(

4478

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4479

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4480

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4481

for ep_key, ep in endpoints.items():

4482

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4483

if params:

4484

return self._extract_response(

4485

item_id=item_id, query=params, ep=ep, headers=headers,

4486

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4487

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4488

err_note = 'Failed to resolve url (does the playlist exist?)'

4489

if fatal:

4490

raise ExtractorError(err_note, expected=True)

4491

self.report_warning(err_note, item_id)

4492

4493

_SEARCH_PARAMS = None

4494

4495

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4496

data = {'query': query}

4497

if params is NO_DEFAULT:

4498

params = self._SEARCH_PARAMS

4499

if params:

4500

data['params'] = params

4501

4502

content_keys = (

4503

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4504

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4505

# ytmusic search

4506

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4507

('continuationContents', ),

4508

)

4509

display_id = f'query "{query}"'

4510

check_get_keys = tuple({keys[0] for keys in content_keys})

4511

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4512

self._report_playlist_authcheck(ytcfg, fatal=False)

4513

4514

continuation_list = [None]

4515

search = None

4516

for page_num in itertools.count(1):

4517

data.update(continuation_list[0] or {})

4518

headers = self.generate_api_headers(

4519

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4520

search = self._extract_response(

4521

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4522

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4523

slr_contents = traverse_obj(search, *content_keys)

4524

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4525

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4530

IE_DESC = 'YouTube Tabs'

4531

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4540

(?P<not_channel>

4541

feed/|hashtag/|

4542

(?:playlist|watch)\?.*?\blist=

4543

)|

4544

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4549

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4550

}

4551

IE_NAME = 'youtube:tab'

4552

4553

_TESTS = [{

4554

'note': 'playlists, multipage',

4555

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4556

'playlist_mincount': 94,

4557

'info_dict': {

4558

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4559

'title': 'Igor Kleiner - Playlists',

4560

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4561

'uploader': 'Igor Kleiner',

4562

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4563

'channel': 'Igor Kleiner',

4564

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4565

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4566

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4567

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4568

'channel_follower_count': int

4569

},

4570

}, {

4571

'note': 'playlists, multipage, different order',

4572

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4573

'playlist_mincount': 94,

4574

'info_dict': {

4575

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4576

'title': 'Igor Kleiner - Playlists',

4577

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4578

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4579

'uploader': 'Igor Kleiner',

4580

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4581

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4582

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4583

'channel': 'Igor Kleiner',

4584

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4585

'channel_follower_count': int

4586

},

4587

}, {

4588

'note': 'playlists, series',

4589

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4590

'playlist_mincount': 5,

4591

'info_dict': {

4592

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4593

'title': '3Blue1Brown - Playlists',

4594

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4595

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4596

'uploader': '3Blue1Brown',

4597

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4598

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4599

'channel': '3Blue1Brown',

4600

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4601

'tags': ['Mathematics'],

4602

'channel_follower_count': int

4603

},

4604

}, {

4605

'note': 'playlists, singlepage',

4606

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4607

'playlist_mincount': 4,

4608

'info_dict': {

4609

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4610

'title': 'ThirstForScience - Playlists',

4611

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4612

'uploader': 'ThirstForScience',

4613

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4614

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4615

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4616

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4617

'tags': 'count:13',

4618

'channel': 'ThirstForScience',

4619

'channel_follower_count': int

4620

}

4621

}, {

4622

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4623

'only_matching': True,

4624

}, {

4625

'note': 'basic, single video playlist',

4626

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4627

'info_dict': {

4628

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4629

'uploader': 'Sergey M.',

4630

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4631

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4636

'channel': 'Sergey M.',

4637

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4638

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4639

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4644

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4645

'info_dict': {

4646

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4647

'uploader': 'Sergey M.',

4648

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4649

'title': 'youtube-dl empty playlist',

4650

'tags': [],

4651

'channel': 'Sergey M.',

4652

'description': '',

4653

'modified_date': '20160902',

4654

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4655

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4656

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4662

'info_dict': {

4663

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4664

'title': 'lex will - Home',

4665

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4666

'uploader': 'lex will',

4667

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4668

'channel': 'lex will',

4669

'tags': ['bible', 'history', 'prophesy'],

4670

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4671

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4672

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4673

'channel_follower_count': int

4674

},

4675

'playlist_mincount': 2,

4676

}, {

4677

'note': 'Videos tab',

4678

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4679

'info_dict': {

4680

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4681

'title': 'lex will - Videos',

4682

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4683

'uploader': 'lex will',

4684

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4685

'tags': ['bible', 'history', 'prophesy'],

4686

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4687

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4688

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4689

'channel': 'lex will',

4690

'channel_follower_count': int

4691

},

4692

'playlist_mincount': 975,

4693

}, {

4694

'note': 'Videos tab, sorted by popular',

4695

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4696

'info_dict': {

4697

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4698

'title': 'lex will - Videos',

4699

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4700

'uploader': 'lex will',

4701

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4702

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4703

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4704

'channel': 'lex will',

4705

'tags': ['bible', 'history', 'prophesy'],

4706

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4707

'channel_follower_count': int

4708

},

4709

'playlist_mincount': 199,

4710

}, {

4711

'note': 'Playlists tab',

4712

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4713

'info_dict': {

4714

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4715

'title': 'lex will - Playlists',

4716

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4717

'uploader': 'lex will',

4718

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4719

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4720

'channel': 'lex will',

4721

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4722

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4723

'tags': ['bible', 'history', 'prophesy'],

4724

'channel_follower_count': int

4725

},

4726

'playlist_mincount': 17,

4727

}, {

4728

'note': 'Community tab',

4729

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4730

'info_dict': {

4731

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4732

'title': 'lex will - Community',

4733

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4734

'uploader': 'lex will',

4735

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4736

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4737

'channel': 'lex will',

4738

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4739

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4740

'tags': ['bible', 'history', 'prophesy'],

4741

'channel_follower_count': int

4742

},

4743

'playlist_mincount': 18,

4744

}, {

4745

'note': 'Channels tab',

4746

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4747

'info_dict': {

4748

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4749

'title': 'lex will - Channels',

4750

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4751

'uploader': 'lex will',

4752

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4753

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4754

'channel': 'lex will',

4755

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4756

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4757

'tags': ['bible', 'history', 'prophesy'],

4758

'channel_follower_count': int

4759

},

4760

'playlist_mincount': 12,

4761

}, {

4762

'note': 'Search tab',

4763

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4764

'playlist_mincount': 40,

4765

'info_dict': {

4766

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4767

'title': '3Blue1Brown - Search - linear algebra',

4768

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4769

'uploader': '3Blue1Brown',

4770

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4771

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4772

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4773

'tags': ['Mathematics'],

4774

'channel': '3Blue1Brown',

4775

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4776

'channel_follower_count': int

4777

},

4778

}, {

4779

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4780

'only_matching': True,

4781

}, {

4782

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4783

'only_matching': True,

4784

}, {

4785

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4786

'only_matching': True,

4787

}, {

4788

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4789

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4790

'info_dict': {

4791

'title': '29C3: Not my department',

4792

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4793

'uploader': 'Christiaan008',

4794

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4795

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4796

'tags': [],

4797

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4798

'view_count': int,

4799

'modified_date': '20150605',

4800

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4801

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4802

'channel': 'Christiaan008',

4803

},

4804

'playlist_count': 96,

4805

}, {

4806

'note': 'Large playlist',

4807

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4808

'info_dict': {

4809

'title': 'Uploads from Cauchemar',

4810

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4811

'uploader': 'Cauchemar',

4812

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4813

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4814

'tags': [],

4815

'modified_date': r're:\d{8}',

4816

'channel': 'Cauchemar',

4817

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4818

'view_count': int,

4819

'description': '',

4820

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4821

},

4822

'playlist_mincount': 1123,

4823

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4824

}, {

4825

'note': 'even larger playlist, 8832 videos',

4826

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4827

'only_matching': True,

4828

}, {

4829

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4830

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4831

'info_dict': {

4832

'title': 'Uploads from Interstellar Movie',

4833

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4834

'uploader': 'Interstellar Movie',

4835

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4836

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4837

'tags': [],

4838

'view_count': int,

4839

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4840

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4841

'channel': 'Interstellar Movie',

4842

'description': '',

4843

'modified_date': r're:\d{8}',

4844

},

4845

'playlist_mincount': 21,

4846

}, {

4847

'note': 'Playlist with "show unavailable videos" button',

4848

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4849

'info_dict': {

4850

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4851

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4852

'uploader': 'Phim Siêu Nhân Nhật Bản',

4853

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4854

'view_count': int,

4855

'channel': 'Phim Siêu Nhân Nhật Bản',

4856

'tags': [],

4857

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4858

'description': '',

4859

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4860

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4861

'modified_date': r're:\d{8}',

4862

},

4863

'playlist_mincount': 200,

4864

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4865

}, {

4866

'note': 'Playlist with unavailable videos in page 7',

4867

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4868

'info_dict': {

4869

'title': 'Uploads from BlankTV',

4870

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4871

'uploader': 'BlankTV',

4872

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4873

'channel': 'BlankTV',

4874

'channel_url': 'https://www.youtube.com/c/blanktv',

4875

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4876

'view_count': int,

4877

'tags': [],

4878

'uploader_url': 'https://www.youtube.com/c/blanktv',

4879

'modified_date': r're:\d{8}',

4880

'description': '',

4881

},

4882

'playlist_mincount': 1000,

4883

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4884

}, {

4885

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4886

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4887

'info_dict': {

4888

'title': 'Data Analysis with Dr Mike Pound',

4889

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4890

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4891

'uploader': 'Computerphile',

4892

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4893

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4894

'tags': [],

4895

'view_count': int,

4896

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4897

'channel_url': 'https://www.youtube.com/user/Computerphile',

4898

'channel': 'Computerphile',

4899

},

4900

'playlist_mincount': 11,

4901

}, {

4902

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4903

'only_matching': True,

4904

}, {

4905

'note': 'Playlist URL that does not actually serve a playlist',

4906

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4911

'uploader': 'STREEM',

4912

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4913

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4914

'upload_date': '20150526',

4915

'license': 'Standard YouTube License',

4916

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4917

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4924

},

4925

'skip': 'This video is not available.',

4926

'add_ie': [YoutubeIE.ie_key()],

4927

}, {

4928

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4929

'only_matching': True,

4930

}, {

4931

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4932

'only_matching': True,

4933

}, {

4934

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4935

'info_dict': {

4936

'id': 'GgL890LIznQ', # This will keep changing

4937

'ext': 'mp4',

4938

'title': str,

4939

'uploader': 'Sky News',

4940

'uploader_id': 'skynews',

4941

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4942

'upload_date': r're:\d{8}',

4943

'description': str,

4944

'categories': ['News & Politics'],

4945

'tags': list,

4946

'like_count': int,

4947

'release_timestamp': 1642502819,

4948

'channel': 'Sky News',

4949

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4950

'age_limit': 0,

4951

'view_count': int,

4952

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4953

'playable_in_embed': True,

4954

'release_date': '20220118',

4955

'availability': 'public',

4956

'live_status': 'is_live',

4957

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4958

'channel_follower_count': int

4959

},

4960

'params': {

4961

'skip_download': True,

4962

},

4963

'expected_warnings': ['Ignoring subtitle tracks found in '],

4964

}, {

4965

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4970

'uploader': 'The Young Turks',

4971

'uploader_id': 'TheYoungTurks',

4972

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4973

'upload_date': '20150715',

4974

'license': 'Standard YouTube License',

4975

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4976

'categories': ['News & Politics'],

4977

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4982

},

4983

'only_matching': True,

4984

}, {

4985

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4986

'only_matching': True,

4987

}, {

4988

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4989

'only_matching': True,

4990

}, {

4991

'note': 'A channel that is not live. Should raise error',

4992

'url': 'https://www.youtube.com/user/numberphile/live',

4993

'only_matching': True,

4994

}, {

4995

'url': 'https://www.youtube.com/feed/trending',

4996

'only_matching': True,

4997

}, {

4998

'url': 'https://www.youtube.com/feed/library',

4999

'only_matching': True,

5000

}, {

5001

'url': 'https://www.youtube.com/feed/history',

5002

'only_matching': True,

5003

}, {

5004

'url': 'https://www.youtube.com/feed/subscriptions',

5005

'only_matching': True,

5006

}, {

5007

'url': 'https://www.youtube.com/feed/watch_later',

5008

'only_matching': True,

5009

}, {

5010

'note': 'Recommended - redirects to home page.',

5011

'url': 'https://www.youtube.com/feed/recommended',

5012

'only_matching': True,

5013

}, {

5014

'note': 'inline playlist with not always working continuations',

5015

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5016

'only_matching': True,

5017

}, {

5018

'url': 'https://www.youtube.com/course',

5019

'only_matching': True,

5020

}, {

5021

'url': 'https://www.youtube.com/zsecurity',

5022

'only_matching': True,

5023

}, {

5024

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5025

'only_matching': True,

5026

}, {

5027

'url': 'https://www.youtube.com/TheYoungTurks/live',

5028

'only_matching': True,

5029

}, {

5030

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5037

}, {

5038

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5039

'only_matching': True,

5040

}, {

5041

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5042

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5043

'only_matching': True

5044

}, {

5045

'note': '/browse/ should redirect to /channel/',

5046

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5047

'only_matching': True

5048

}, {

5049

'note': 'VLPL, should redirect to playlist?list=PL...',

5050

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5051

'info_dict': {

5052

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5053

'uploader': 'NoCopyrightSounds',

5054

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5055

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5056

'title': 'NCS Releases',

5057

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5058

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5059

'modified_date': r're:\d{8}',

5060

'view_count': int,

5061

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5062

'tags': [],

5063

'channel': 'NoCopyrightSounds',

5064

},

5065

'playlist_mincount': 166,

5066

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5067

}, {

5068

'note': 'Topic, should redirect to playlist?list=UU...',

5069

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5070

'info_dict': {

5071

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5072

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5073

'title': 'Uploads from Royalty Free Music - Topic',

5074

'uploader': 'Royalty Free Music - Topic',

5075

'tags': [],

5076

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5077

'channel': 'Royalty Free Music - Topic',

5078

'view_count': int,

5079

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5080

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5081

'modified_date': r're:\d{8}',

5082

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5083

'description': '',

5084

},

5085

'expected_warnings': [

5086

'The URL does not have a videos tab',

5087

r'[Uu]navailable videos (are|will be) hidden',

5088

],

5089

'playlist_mincount': 101,

5090

}, {

5091

'note': 'Topic without a UU playlist',

5092

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5093

'info_dict': {

5094

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5095

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5096

'tags': [],

5097

},

5098

'expected_warnings': [

5099

'the playlist redirect gave error',

5100

],

5101

'playlist_mincount': 9,

5102

}, {

5103

'note': 'Youtube music Album',

5104

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5105

'info_dict': {

5106

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5107

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5112

'modified_date': r're:\d{8}',

5113

},

5114

'playlist_count': 50,

5115

}, {

5116

'note': 'unlisted single video playlist',

5117

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5118

'info_dict': {

5119

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5120

'uploader': 'colethedj',

5121

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5122

'title': 'yt-dlp unlisted playlist test',

5123

'availability': 'unlisted',

5124

'tags': [],

5125

'modified_date': '20211208',

5126

'channel': 'colethedj',

5127

'view_count': int,

5128

'description': '',

5129

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5130

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5131

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5136

'url': 'https://www.youtube.com/feed/recommended',

5137

'info_dict': {

5138

'id': 'recommended',

5139

'title': 'recommended',

5140

'tags': [],

5141

},

5142

'playlist_mincount': 50,

5143

'params': {

5144

'skip_download': True,

5145

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5146

},

5147

}, {

5148

'note': 'API Fallback: /videos tab, sorted by oldest first',

5149

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5150

'info_dict': {

5151

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5152

'title': 'Cody\'sLab - Videos',

5153

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5154

'uploader': 'Cody\'sLab',

5155

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5156

'channel': 'Cody\'sLab',

5157

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5158

'tags': [],

5159

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5160

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5161

'channel_follower_count': int

5162

},

5163

'playlist_mincount': 650,

5164

'params': {

5165

'skip_download': True,

5166

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5167

},

5168

}, {

5169

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5170

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5171

'info_dict': {

5172

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5173

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5174

'title': 'Uploads from Royalty Free Music - Topic',

5175

'uploader': 'Royalty Free Music - Topic',

5176

'modified_date': r're:\d{8}',

5177

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5178

'description': '',

5179

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5180

'tags': [],

5181

'channel': 'Royalty Free Music - Topic',

5182

'view_count': int,

5183

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5184

},

5185

'expected_warnings': [

5186

'does not have a videos tab',

5187

r'[Uu]navailable videos (are|will be) hidden',

5188

],

5189

'playlist_mincount': 101,

5190

'params': {

5191

'skip_download': True,

5192

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5193

},

5194

}, {

5195

'note': 'non-standard redirect to regional channel',

5196

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5197

'only_matching': True

5198

}, {

5199

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5200

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5201

'info_dict': {

5202

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5203

'modified_date': '20220407',

5204

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5205

'tags': [],

5206

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5207

'uploader': 'pukkandan',

5208

'availability': 'unlisted',

5209

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5210

'channel': 'pukkandan',

5211

'description': 'Test for collaborative playlist',

5212

'title': 'yt-dlp test - collaborative playlist',

5213

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5214

},

5215

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5220

return False if YoutubeIE.suitable(url) else super().suitable(url)

5221

5222

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5223

5224

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5225

def _real_extract(self, url, smuggled_data):

5226

item_id = self._match_id(url)

5227

url = compat_urlparse.urlunparse(

5228

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5229

compat_opts = self.get_param('compat_opts', [])

5230

5231

def get_mobj(url):

5232

mobj = self._URL_RE.match(url).groupdict()

5233

mobj.update((k, '') for k, v in mobj.items() if v is None)

5234

return mobj

5235

5236

mobj, redirect_warning = get_mobj(url), None

5237

# Youtube returns incomplete data if tabname is not lower case

5238

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5239

if is_channel:

5240

if smuggled_data.get('is_music_url'):

5241

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5242

item_id = item_id[2:]

5243

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5244

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5245

mdata = self._extract_tab_endpoint(

5246

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5247

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5248

get_all=False, expected_type=compat_str)

5249

if not murl:

5250

raise ExtractorError('Failed to resolve album to playlist')

5251

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5252

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5253

pre = f'https://www.youtube.com/channel/{item_id}'

5254

5255

original_tab_name = tab

5256

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5257

# Home URLs should redirect to /videos/

5258

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5259

'To download only the videos in the home page, add a "/featured" to the URL')

5260

tab = '/videos'

5261

5262

url = ''.join((pre, tab, post))

5263

mobj = get_mobj(url)

5264

5265

# Handle both video/playlist URLs

5266

qs = parse_qs(url)

5267

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5268

5269

if not video_id and mobj['not_channel'].startswith('watch'):

5270

if not playlist_id:

5271

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5272

raise ExtractorError('Unable to recognize tab page')

5273

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5274

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5275

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5276

mobj = get_mobj(url)

5277

5278

if video_id and playlist_id:

5279

if self.get_param('noplaylist'):

5280

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5281

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5282

ie=YoutubeIE.ie_key(), video_id=video_id)

5283

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5284

5285

data, ytcfg = self._extract_data(url, item_id)

5286

5287

# YouTube may provide a non-standard redirect to the regional channel

5288

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5289

redirect_url = traverse_obj(

5290

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5291

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5292

redirect_url = ''.join((

5293

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5294

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5295

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5296

5297

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5298

if tabs:

5299

selected_tab = self._extract_selected_tab(tabs)

5300

selected_tab_name = selected_tab.get('title', '').lower()

5301

if selected_tab_name == 'home':

5302

selected_tab_name = 'featured'

5303

requested_tab_name = mobj['tab'][1:]

5304

if 'no-youtube-channel-redirect' not in compat_opts:

5305

if requested_tab_name == 'live':

5306

# Live tab should have redirected to the video

5307

raise ExtractorError('The channel is not currently live', expected=True)

5308

if requested_tab_name not in ('', selected_tab_name):

5309

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5310

if not original_tab_name:

5311

if item_id[:2] == 'UC':

5312

# Topic channels don't have /videos. Use the equivalent playlist instead

5313

pl_id = f'UU{item_id[2:]}'

5314

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5315

try:

5316

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5317

except ExtractorError:

5318

redirect_warning += ' and the playlist redirect gave error'

5319

else:

5320

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5321

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5322

if selected_tab_name and selected_tab_name != requested_tab_name:

5323

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5324

else:

5325

raise ExtractorError(redirect_warning, expected=True)

5326

5327

if redirect_warning:

5328

self.to_screen(redirect_warning)

5329

self.write_debug(f'Final URL: {url}')

5330

5331

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5332

if 'no-youtube-unavailable-videos' not in compat_opts:

5333

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5334

self._extract_and_report_alerts(data, only_once=True)

5335

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5336

if tabs:

5337

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5338

5339

playlist = traverse_obj(

5340

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5341

if playlist:

5342

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5343

5344

video_id = traverse_obj(

5345

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5346

if video_id:

5347

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5348

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5349

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5350

ie=YoutubeIE.ie_key(), video_id=video_id)

5351

5352

raise ExtractorError('Unable to recognize tab page')

5353

5354

5355

class YoutubePlaylistIE(InfoExtractor):

5356

IE_DESC = 'YouTube playlists'

5357

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5368

)''' % {

5369

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5370

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5371

}

5372

IE_NAME = 'youtube:playlist'

5373

_TESTS = [{

5374

'note': 'issue #673',

5375

'url': 'PLBB231211A4F62143',

5376

'info_dict': {

5377

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5378

'id': 'PLBB231211A4F62143',

5379

'uploader': 'Wickman',

5380

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5381

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5382

'view_count': int,

5383

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5384

'modified_date': r're:\d{8}',

5385

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5386

'channel': 'Wickman',

5387

'tags': [],

5388

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5389

},

5390

'playlist_mincount': 29,

5391

}, {

5392

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5393

'info_dict': {

5394

'title': 'YDL_safe_search',

5395

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5396

},

5397

'playlist_count': 2,

5398

'skip': 'This playlist is private',

5399

}, {

5400

'note': 'embedded',

5401

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5406

'uploader': 'milan',

5407

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5408

'description': '',

5409

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5410

'tags': [],

5411

'modified_date': '20140919',

5412

'view_count': int,

5413

'channel': 'milan',

5414

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5415

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5416

},

5417

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5418

}, {

5419

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5420

'playlist_mincount': 654,

5421

'info_dict': {

5422

'title': '2018 Chinese New Singles (11/6 updated)',

5423

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5424

'uploader': 'LBK',

5425

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5426

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5427

'channel': 'LBK',

5428

'view_count': int,

5429

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5430

'tags': [],

5431

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5432

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5433

'modified_date': r're:\d{8}',

5434

},

5435

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5436

}, {

5437

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5438

'only_matching': True,

5439

}, {

5440

# music album playlist

5441

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5442

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5447

if YoutubeTabIE.suitable(url):

5448

return False

5449

from ..utils import parse_qs

5450

qs = parse_qs(url)

5451

if qs.get('v', [None])[0]:

5452

return False

5453

return super().suitable(url)

5454

5455

def _real_extract(self, url):

5456

playlist_id = self._match_id(url)

5457

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5458

url = update_url_query(

5459

'https://www.youtube.com/playlist',

5460

parse_qs(url) or {'list': playlist_id})

5461

if is_music_url:

5462

url = smuggle_url(url, {'is_music_url': True})

5463

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5464

5465

5466

class YoutubeYtBeIE(InfoExtractor):

5467

IE_DESC = 'youtu.be'

5468

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5469

_TESTS = [{

5470

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5475

'uploader': 'Backus-Page House Museum',

5476

'uploader_id': 'backuspagemuseum',

5477

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5478

'upload_date': '20161008',

5479

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5480

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5485

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5486

'channel': 'Backus-Page House Museum',

5487

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5488

'live_status': 'not_live',

5489

'view_count': int,

5490

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5491

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5497

},

5498

}, {

5499

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5500

'only_matching': True,

5501

}]

5502

5503

def _real_extract(self, url):

5504

mobj = self._match_valid_url(url)

5505

video_id = mobj.group('id')

5506

playlist_id = mobj.group('playlist_id')

5507

return self.url_result(

5508

update_url_query('https://www.youtube.com/watch', {

5509

'v': video_id,

5510

'list': playlist_id,

5511

'feature': 'youtu.be',

5512

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5513

5514

5515

class YoutubeLivestreamEmbedIE(InfoExtractor):

5516

IE_DESC = 'YouTube livestream embeds'

5517

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5518

_TESTS = [{

5519

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5520

'only_matching': True,

5521

}]

5522

5523

def _real_extract(self, url):

5524

channel_id = self._match_id(url)

5525

return self.url_result(

5526

f'https://www.youtube.com/channel/{channel_id}/live',

5527

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5528

5529

5530

class YoutubeYtUserIE(InfoExtractor):

5531

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5532

IE_NAME = 'youtube:user'

5533

_VALID_URL = r'ytuser:(?P<id>.+)'

5534

_TESTS = [{

5535

'url': 'ytuser:phihag',

5536

'only_matching': True,

5537

}]

5538

5539

def _real_extract(self, url):

5540

user_id = self._match_id(url)

5541

return self.url_result(

5542

'https://www.youtube.com/user/%s/videos' % user_id,

5543

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5544

5545

5546

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5547

IE_NAME = 'youtube:favorites'

5548

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5549

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5550

_LOGIN_REQUIRED = True

5551

_TESTS = [{

5552

'url': ':ytfav',

5553

'only_matching': True,

5554

}, {

5555

'url': ':ytfavorites',

5556

'only_matching': True,

5557

}]

5558

5559

def _real_extract(self, url):

5560

return self.url_result(

5561

'https://www.youtube.com/playlist?list=LL',

5562

ie=YoutubeTabIE.ie_key())

5563

5564

5565

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5566

IE_NAME = 'youtube:notif'

5567

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5568

_VALID_URL = r':ytnotif(?:ication)?s?'

5569

_LOGIN_REQUIRED = True

5570

_TESTS = [{

5571

'url': ':ytnotif',

5572

'only_matching': True,

5573

}, {

5574

'url': ':ytnotifications',

5575

'only_matching': True,

5576

}]

5577

5578

def _extract_notification_menu(self, response, continuation_list):

5579

notification_list = traverse_obj(

5580

response,

5581

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5582

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5583

expected_type=list) or []

5584

continuation_list[0] = None

5585

for item in notification_list:

5586

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5587

if entry:

5588

yield entry

5589

continuation = item.get('continuationItemRenderer')

5590

if continuation:

5591

continuation_list[0] = continuation

5592

5593

def _extract_notification_renderer(self, notification):

5594

video_id = traverse_obj(

5595

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5596

url = f'https://www.youtube.com/watch?v={video_id}'

5597

channel_id = None

5598

if not video_id:

5599

browse_ep = traverse_obj(

5600

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5601

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5602

post_id = self._search_regex(

5603

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5604

'post id', default=None)

5605

if not channel_id or not post_id:

5606

return

5607

# The direct /post url redirects to this in the browser

5608

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5609

5610

channel = traverse_obj(

5611

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5612

expected_type=str)

5613

title = self._search_regex(

5614

rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),

5615

'video title', default=None)

5616

if title:

5617

title = title.replace('\xad', '') # remove soft hyphens

5618

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5619

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5625

'video_id': video_id,

5626

'title': title,

5627

'channel_id': channel_id,

5628

'channel': channel,

5629

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5630

'upload_date': upload_date,

5631

}

5632

5633

def _notification_menu_entries(self, ytcfg):

5634

continuation_list = [None]

5635

response = None

5636

for page in itertools.count(1):

5637

ctoken = traverse_obj(

5638

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5639

response = self._extract_response(

5640

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5641

ep='notification/get_notification_menu', check_get_keys='actions',

5642

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5643

yield from self._extract_notification_menu(response, continuation_list)

5644

if not continuation_list[0]:

5645

break

5646

5647

def _real_extract(self, url):

5648

display_id = 'notifications'

5649

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5650

self._report_playlist_authcheck(ytcfg)

5651

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5652

5653

5654

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5655

IE_DESC = 'YouTube search'

5656

IE_NAME = 'youtube:search'

5657

_SEARCH_KEY = 'ytsearch'

5658

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5659

_TESTS = [{

5660

'url': 'ytsearch5:youtube-dl test video',

5661

'playlist_count': 5,

5662

'info_dict': {

5663

'id': 'youtube-dl test video',

5664

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5670

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5671

_SEARCH_KEY = 'ytsearchdate'

5672

IE_DESC = 'YouTube search, newest videos first'

5673

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5674

_TESTS = [{

5675

'url': 'ytsearchdate5:youtube-dl test video',

5676

'playlist_count': 5,

5677

'info_dict': {

5678

'id': 'youtube-dl test video',

5679

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5685

IE_DESC = 'YouTube search URLs with sorting and filter support'

5686

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5687

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5688

_TESTS = [{

5689

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5690

'playlist_mincount': 5,

5691

'info_dict': {

5692

'id': 'youtube-dl test video',

5693

'title': 'youtube-dl test video',

5694

}

5695

}, {

5696

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5697

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5704

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5715

'only_matching': True,

5716

}]

5717

5718

def _real_extract(self, url):

5719

qs = parse_qs(url)

5720

query = (qs.get('search_query') or qs.get('q'))[0]

5721

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5722

5723

5724

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5725

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5726

IE_NAME = 'youtube:music:search_url'

5727

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5728

_TESTS = [{

5729

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5730

'playlist_count': 16,

5731

'info_dict': {

5732

'id': 'royalty free music',

5733

'title': 'royalty free music',

5734

}

5735

}, {

5736

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5737

'playlist_mincount': 30,

5738

'info_dict': {

5739

'id': 'royalty free music - songs',

5740

'title': 'royalty free music - songs',

5741

},

5742

'params': {'extract_flat': 'in_playlist'}

5743

}, {

5744

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5745

'playlist_mincount': 30,

5746

'info_dict': {

5747

'id': 'royalty free music - community playlists',

5748

'title': 'royalty free music - community playlists',

5749

},

5750

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5755

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5756

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5757

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5758

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5759

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5760

}

5761

5762

def _real_extract(self, url):

5763

qs = parse_qs(url)

5764

query = (qs.get('search_query') or qs.get('q'))[0]

5765

params = qs.get('sp', (None,))[0]

5766

if params:

5767

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5768

else:

5769

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5770

params = self._SECTIONS.get(section)

5771

if not params:

5772

section = None

5773

title = join_nonempty(query, section, delim=' - ')

5774

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5775

5776

5777

class YoutubeFeedsInfoExtractor(InfoExtractor):

5778

"""

5779

Base class for feed extractors

5780

Subclasses must define the _FEED_NAME property.

5781

"""

5782

_LOGIN_REQUIRED = True

5783

5784

def _real_initialize(self):

5785

YoutubeBaseInfoExtractor._check_login_required(self)

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5790

5791

def _real_extract(self, url):

5792

return self.url_result(

5793

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5794

5795

5796

class YoutubeWatchLaterIE(InfoExtractor):

5797

IE_NAME = 'youtube:watchlater'

5798

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5799

_VALID_URL = r':ytwatchlater'

5800

_TESTS = [{

5801

'url': ':ytwatchlater',

5802

'only_matching': True,

5803

}]

5804

5805

def _real_extract(self, url):

5806

return self.url_result(

5807

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5808

5809

5810

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5811

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5812

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5813

_FEED_NAME = 'recommended'

5814

_LOGIN_REQUIRED = False

5815

_TESTS = [{

5816

'url': ':ytrec',

5817

'only_matching': True,

5818

}, {

5819

'url': ':ytrecommended',

5820

'only_matching': True,

5821

}, {

5822

'url': 'https://youtube.com',

5823

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5828

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5829

_VALID_URL = r':ytsub(?:scription)?s?'

5830

_FEED_NAME = 'subscriptions'

5831

_TESTS = [{

5832

'url': ':ytsubs',

5833

'only_matching': True,

5834

}, {

5835

'url': ':ytsubscriptions',

5836

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5841

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5842

_VALID_URL = r':ythis(?:tory)?'

5843

_FEED_NAME = 'history'

5844

_TESTS = [{

5845

'url': ':ythistory',

5846

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5851

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5852

IE_NAME = 'youtube:stories'

5853

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5854

_TESTS = [{

5855

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5856

'only_matching': True,

5857

}]

5858

5859

def _real_extract(self, url):

5860

playlist_id = f'RLTD{self._match_id(url)}'

5861

return self.url_result(

5862

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5863

ie=YoutubeTabIE, video_id=playlist_id)

5864

5865

5866

class YoutubeTruncatedURLIE(InfoExtractor):

5867

IE_NAME = 'youtube:truncated_url'

5868

IE_DESC = False # Do not list

5869

_VALID_URL = r'''(?x)

5870

(?:https?://)?

5871

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5872

(?:watch\?(?:

5873

feature=[a-z_]+|

5874

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5887

'only_matching': True,

5888

}, {

5889

'url': 'https://www.youtube.com/watch?',

5890

'only_matching': True,

5891

}, {

5892

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5893

'only_matching': True,

5894

}, {

5895

'url': 'https://www.youtube.com/watch?feature=foo',

5896

'only_matching': True,

5897

}, {

5898

'url': 'https://www.youtube.com/watch?hl=en-GB',

5899

'only_matching': True,

5900

}, {

5901

'url': 'https://www.youtube.com/watch?t=2372',

5902

'only_matching': True,

5903

}]

5904

5905

def _real_extract(self, url):

5906

raise ExtractorError(

5907

'Did you forget to quote the URL? Remember that & is a meta '

5908

'character in most shells, so you want to put the URL in quotes, '

5909

'like youtube-dl '

5910

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5911

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5916

IE_NAME = 'youtube:clip'

5917

IE_DESC = False # Do not list

5918

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5919

5920

def _real_extract(self, url):

5921

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5922

return self.url_result(url, 'Generic')

5923

5924

5925

class YoutubeTruncatedIDIE(InfoExtractor):

5926

IE_NAME = 'youtube:truncated_id'

5927

IE_DESC = False # Do not list

5928

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5929

5930

_TESTS = [{

5931

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5932

'only_matching': True,

5933

}]

5934

5935

def _real_extract(self, url):

5936

video_id = self._match_id(url)

5937

raise ExtractorError(

5938

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

5939

expected=True)