jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import functools # isort: split
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	classproperty,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	float_or_none,
	40	format_field,
	41	get_first,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73	# any clients starting with _ cannot be explicity requested by the user
	74	INNERTUBE_CLIENTS = {
	75	'web': {
	76	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	77	'INNERTUBE_CONTEXT': {
	78	'client': {
	79	'clientName': 'WEB',
	80	'clientVersion': '2.20211221.00.00',
	81	}
	82	},
	83	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	84	},
	85	'web_embedded': {
	86	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	87	'INNERTUBE_CONTEXT': {
	88	'client': {
	89	'clientName': 'WEB_EMBEDDED_PLAYER',
	90	'clientVersion': '1.20211215.00.01',
	91	},
	92	},
	93	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	94	},
	95	'web_music': {
	96	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	97	'INNERTUBE_HOST': 'music.youtube.com',
	98	'INNERTUBE_CONTEXT': {
	99	'client': {
	100	'clientName': 'WEB_REMIX',
	101	'clientVersion': '1.20211213.00.00',
	102	}
	103	},
	104	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	105	},
	106	'web_creator': {
	107	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	108	'INNERTUBE_CONTEXT': {
	109	'client': {
	110	'clientName': 'WEB_CREATOR',
	111	'clientVersion': '1.20211220.02.00',
	112	}
	113	},
	114	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	115	},
	116	'android': {
	117	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	118	'INNERTUBE_CONTEXT': {
	119	'client': {
	120	'clientName': 'ANDROID',
	121	'clientVersion': '16.49',
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '16.49',
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '4.57',
	144	}
	145	},
	146	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	147	'REQUIRE_JS_PLAYER': False
	148	},
	149	'android_creator': {
	150	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	151	'INNERTUBE_CONTEXT': {
	152	'client': {
	153	'clientName': 'ANDROID_CREATOR',
	154	'clientVersion': '21.47',
	155	},
	156	},
	157	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	158	'REQUIRE_JS_PLAYER': False
	159	},
	160	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	161	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	162	'ios': {
	163	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	164	'INNERTUBE_CONTEXT': {
	165	'client': {
	166	'clientName': 'IOS',
	167	'clientVersion': '16.46',
	168	'deviceModel': 'iPhone14,3',
	169	}
	170	},
	171	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	172	'REQUIRE_JS_PLAYER': False
	173	},
	174	'ios_embedded': {
	175	'INNERTUBE_CONTEXT': {
	176	'client': {
	177	'clientName': 'IOS_MESSAGES_EXTENSION',
	178	'clientVersion': '16.46',
	179	'deviceModel': 'iPhone14,3',
	180	},
	181	},
	182	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	183	'REQUIRE_JS_PLAYER': False
	184	},
	185	'ios_music': {
	186	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	187	'INNERTUBE_CONTEXT': {
	188	'client': {
	189	'clientName': 'IOS_MUSIC',
	190	'clientVersion': '4.57',
	191	},
	192	},
	193	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	194	'REQUIRE_JS_PLAYER': False
	195	},
	196	'ios_creator': {
	197	'INNERTUBE_CONTEXT': {
	198	'client': {
	199	'clientName': 'IOS_CREATOR',
	200	'clientVersion': '21.47',
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	# mweb has 'ultralow' formats
	207	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	208	'mweb': {
	209	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	210	'INNERTUBE_CONTEXT': {
	211	'client': {
	212	'clientName': 'MWEB',
	213	'clientVersion': '2.20211221.01.00',
	214	}
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	217	},
	218	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	219	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	220	'tv_embedded': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	225	'clientVersion': '2.0',
	226	},
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	229	},
	230	}
	231
	232
	233	def _split_innertube_client(client_name):
	234	variant, *base = client_name.rsplit('.', 1)
	235	if base:
	236	return variant, base[0], variant
	237	base, *variant = client_name.split('_', 1)
	238	return client_name, base, variant[0] if variant else None
	239
	240
	241	def build_innertube_clients():
	242	THIRD_PARTY = {
	243	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	244	}
	245	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	246	priority = qualities(BASE_CLIENTS[::-1])
	247
	248	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	249	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	250	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	251	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	252	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	253
	254	_, base_client, variant = _split_innertube_client(client)
	255	ytcfg['priority'] = 10 * priority(base_client)
	256
	257	if not variant:
	258	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	259	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	260	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	261	embedscreen['priority'] -= 3
	262	elif variant == 'embedded':
	263	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	ytcfg['priority'] -= 2
	265	else:
	266	ytcfg['priority'] -= 3
	267
	268
	269	build_innertube_clients()
	270
	271
	272	class YoutubeBaseInfoExtractor(InfoExtractor):
	273	"""Provide base functions for Youtube extractors"""
	274
	275	_RESERVED_NAMES = (
	276	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	277	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	278	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	279	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	280
	281	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	282
	283	# _NETRC_MACHINE = 'youtube'
	284
	285	# If True it will raise an error if no login info is provided
	286	_LOGIN_REQUIRED = False
	287
	288	_INVIDIOUS_SITES = (
	289	# invidious-redirect websites
	290	r'(?:www\.)?redirect\.invidious\.io',
	291	r'(?:(?:www\|dev)\.)?invidio\.us',
	292	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	293	r'(?:www\.)?invidious\.pussthecat\.org',
	294	r'(?:www\.)?invidious\.zee\.li',
	295	r'(?:www\.)?invidious\.ethibox\.fr',
	296	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	297	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	298	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	299	# youtube-dl invidious instances list
	300	r'(?:(?:www\|no)\.)?invidiou\.sh',
	301	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	302	r'(?:www\.)?invidious\.kabi\.tk',
	303	r'(?:www\.)?invidious\.mastodon\.host',
	304	r'(?:www\.)?invidious\.zapashcanon\.fr',
	305	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	306	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	307	r'(?:www\.)?invidious\.himiko\.cloud',
	308	r'(?:www\.)?invidious\.reallyancient\.tech',
	309	r'(?:www\.)?invidious\.tube',
	310	r'(?:www\.)?invidiou\.site',
	311	r'(?:www\.)?invidious\.site',
	312	r'(?:www\.)?invidious\.xyz',
	313	r'(?:www\.)?invidious\.nixnet\.xyz',
	314	r'(?:www\.)?invidious\.048596\.xyz',
	315	r'(?:www\.)?invidious\.drycat\.fr',
	316	r'(?:www\.)?inv\.skyn3t\.in',
	317	r'(?:www\.)?tube\.poal\.co',
	318	r'(?:www\.)?tube\.connect\.cafe',
	319	r'(?:www\.)?vid\.wxzm\.sx',
	320	r'(?:www\.)?vid\.mint\.lgbt',
	321	r'(?:www\.)?vid\.puffyan\.us',
	322	r'(?:www\.)?yewtu\.be',
	323	r'(?:www\.)?yt\.elukerio\.org',
	324	r'(?:www\.)?yt\.lelux\.fi',
	325	r'(?:www\.)?invidious\.ggc-project\.de',
	326	r'(?:www\.)?yt\.maisputain\.ovh',
	327	r'(?:www\.)?ytprivate\.com',
	328	r'(?:www\.)?invidious\.13ad\.de',
	329	r'(?:www\.)?invidious\.toot\.koeln',
	330	r'(?:www\.)?invidious\.fdn\.fr',
	331	r'(?:www\.)?watch\.nettohikari\.com',
	332	r'(?:www\.)?invidious\.namazso\.eu',
	333	r'(?:www\.)?invidious\.silkky\.cloud',
	334	r'(?:www\.)?invidious\.exonip\.de',
	335	r'(?:www\.)?invidious\.riverside\.rocks',
	336	r'(?:www\.)?invidious\.blamefran\.net',
	337	r'(?:www\.)?invidious\.moomoo\.de',
	338	r'(?:www\.)?ytb\.trom\.tf',
	339	r'(?:www\.)?yt\.cyberhost\.uk',
	340	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	341	r'(?:www\.)?qklhadlycap4cnod\.onion',
	342	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	343	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	344	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	345	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	346	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	347	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	348	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	349	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	350	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	351	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	352	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	353	r'(?:www\.)?piped\.kavin\.rocks',
	354	r'(?:www\.)?piped\.silkky\.cloud',
	355	r'(?:www\.)?piped\.tokhmi\.xyz',
	356	r'(?:www\.)?piped\.moomoo\.me',
	357	r'(?:www\.)?il\.ax',
	358	r'(?:www\.)?piped\.syncpundit\.com',
	359	r'(?:www\.)?piped\.mha\.fi',
	360	r'(?:www\.)?piped\.mint\.lgbt',
	361	r'(?:www\.)?piped\.privacy\.com\.de',
	362	)
	363
	364	def _initialize_consent(self):
	365	cookies = self._get_cookies('https://www.youtube.com/')
	366	if cookies.get('__Secure-3PSID'):
	367	return
	368	consent_id = None
	369	consent = cookies.get('CONSENT')
	370	if consent:
	371	if 'YES' in consent.value:
	372	return
	373	consent_id = self._search_regex(
	374	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	375	if not consent_id:
	376	consent_id = random.randint(100, 999)
	377	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	378
	379	def _initialize_pref(self):
	380	cookies = self._get_cookies('https://www.youtube.com/')
	381	pref_cookie = cookies.get('PREF')
	382	pref = {}
	383	if pref_cookie:
	384	try:
	385	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	386	except ValueError:
	387	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	388	pref.update({'hl': 'en', 'tz': 'UTC'})
	389	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	390
	391	def _real_initialize(self):
	392	self._initialize_pref()
	393	self._initialize_consent()
	394	self._check_login_required()
	395
	396	def _check_login_required(self):
	397	if self._LOGIN_REQUIRED and not self._cookies_passed:
	398	self.raise_login_required('Login details are needed to download this content', method='cookies')
	399
	400	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	401	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	402
	403	def _get_default_ytcfg(self, client='web'):
	404	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	405
	406	def _get_innertube_host(self, client='web'):
	407	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	408
	409	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	410	# try_get but with fallback to default ytcfg client values when present
	411	_func = lambda y: try_get(y, getter, expected_type)
	412	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	413
	414	def _extract_client_name(self, ytcfg, default_client='web'):
	415	return self._ytcfg_get_safe(
	416	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	417	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	418
	419	def _extract_client_version(self, ytcfg, default_client='web'):
	420	return self._ytcfg_get_safe(
	421	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	422	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	423
	424	def _select_api_hostname(self, req_api_hostname, default_client=None):
	425	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	426	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	427
	428	def _extract_api_key(self, ytcfg=None, default_client='web'):
	429	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	430
	431	def _extract_context(self, ytcfg=None, default_client='web'):
	432	context = get_first(
	433	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	434	# Enforce language and tz for extraction
	435	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	436	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	437	return context
	438
	439	_SAPISID = None
	440
	441	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	442	time_now = round(time.time())
	443	if self._SAPISID is None:
	444	yt_cookies = self._get_cookies('https://www.youtube.com')
	445	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	446	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	447	sapisid_cookie = dict_get(
	448	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	449	if sapisid_cookie and sapisid_cookie.value:
	450	self._SAPISID = sapisid_cookie.value
	451	self.write_debug('Extracted SAPISID cookie')
	452	# SAPISID cookie is required if not already present
	453	if not yt_cookies.get('SAPISID'):
	454	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	455	self._set_cookie(
	456	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	457	else:
	458	self._SAPISID = False
	459	if not self._SAPISID:
	460	return None
	461	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	462	sapisidhash = hashlib.sha1(
	463	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	464	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	465
	466	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	467	note='Downloading API JSON', errnote='Unable to download API page',
	468	context=None, api_key=None, api_hostname=None, default_client='web'):
	469
	470	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	471	data.update(query)
	472	real_headers = self.generate_api_headers(default_client=default_client)
	473	real_headers.update({'content-type': 'application/json'})
	474	if headers:
	475	real_headers.update(headers)
	476	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	477	or api_key or self._extract_api_key(default_client=default_client))
	478	return self._download_json(
	479	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	480	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	481	data=json.dumps(data).encode('utf8'), headers=real_headers,
	482	query={'key': api_key, 'prettyPrint': 'false'})
	483
	484	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	485	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	486
	487	@staticmethod
	488	def _extract_session_index(*data):
	489	"""
	490	Index of current account in account list.
	491	See: https://github.com/yt-dlp/yt-dlp/pull/519
	492	"""
	493	for ytcfg in data:
	494	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	495	if session_index is not None:
	496	return session_index
	497
	498	# Deprecated?
	499	def _extract_identity_token(self, ytcfg=None, webpage=None):
	500	if ytcfg:

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import functools # isort: split

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

74

INNERTUBE_CLIENTS = {

75

'web': {

76

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

77

'INNERTUBE_CONTEXT': {

78

'client': {

79

'clientName': 'WEB',

80

'clientVersion': '2.20211221.00.00',

81

}

82

},

83

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

84

},

85

'web_embedded': {

86

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

87

'INNERTUBE_CONTEXT': {

88

'client': {

89

'clientName': 'WEB_EMBEDDED_PLAYER',

90

'clientVersion': '1.20211215.00.01',

91

},

92

},

93

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

94

},

95

'web_music': {

96

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

97

'INNERTUBE_HOST': 'music.youtube.com',

98

'INNERTUBE_CONTEXT': {

99

'client': {

100

'clientName': 'WEB_REMIX',

101

'clientVersion': '1.20211213.00.00',

102

}

103

},

104

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

105

},

106

'web_creator': {

107

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

108

'INNERTUBE_CONTEXT': {

109

'client': {

110

'clientName': 'WEB_CREATOR',

111

'clientVersion': '1.20211220.02.00',

112

}

113

},

114

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

115

},

116

'android': {

117

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

118

'INNERTUBE_CONTEXT': {

119

'client': {

120

'clientName': 'ANDROID',

121

'clientVersion': '16.49',

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '16.49',

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '4.57',

144

}

145

},

146

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

147

'REQUIRE_JS_PLAYER': False

148

},

149

'android_creator': {

150

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

151

'INNERTUBE_CONTEXT': {

152

'client': {

153

'clientName': 'ANDROID_CREATOR',

154

'clientVersion': '21.47',

155

},

156

},

157

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

158

'REQUIRE_JS_PLAYER': False

159

},

160

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

161

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

162

'ios': {

163

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

164

'INNERTUBE_CONTEXT': {

165

'client': {

166

'clientName': 'IOS',

167

'clientVersion': '16.46',

168

'deviceModel': 'iPhone14,3',

169

}

170

},

171

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

172

'REQUIRE_JS_PLAYER': False

173

},

174

'ios_embedded': {

175

'INNERTUBE_CONTEXT': {

176

'client': {

177

'clientName': 'IOS_MESSAGES_EXTENSION',

178

'clientVersion': '16.46',

179

'deviceModel': 'iPhone14,3',

180

},

181

},

182

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

183

'REQUIRE_JS_PLAYER': False

184

},

185

'ios_music': {

186

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

187

'INNERTUBE_CONTEXT': {

188

'client': {

189

'clientName': 'IOS_MUSIC',

190

'clientVersion': '4.57',

191

},

192

},

193

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

194

'REQUIRE_JS_PLAYER': False

195

},

196

'ios_creator': {

197

'INNERTUBE_CONTEXT': {

198

'client': {

199

'clientName': 'IOS_CREATOR',

200

'clientVersion': '21.47',

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

204

'REQUIRE_JS_PLAYER': False

205

},

206

# mweb has 'ultralow' formats

207

# See: https://github.com/yt-dlp/yt-dlp/pull/557

208

'mweb': {

209

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

210

'INNERTUBE_CONTEXT': {

211

'client': {

212

'clientName': 'MWEB',

213

'clientVersion': '2.20211221.01.00',

214

}

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

217

},

218

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

219

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

220

'tv_embedded': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

225

'clientVersion': '2.0',

226

},

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

234

variant, *base = client_name.rsplit('.', 1)

235

if base:

236

return variant, base[0], variant

237

base, *variant = client_name.split('_', 1)

238

return client_name, base, variant[0] if variant else None

239

240

241

def build_innertube_clients():

242

THIRD_PARTY = {

243

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

244

}

245

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

246

priority = qualities(BASE_CLIENTS[::-1])

247

248

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

249

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

250

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

251

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

252

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

253

254

_, base_client, variant = _split_innertube_client(client)

255

ytcfg['priority'] = 10 * priority(base_client)

256

257

if not variant:

258

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

259

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

260

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

261

embedscreen['priority'] -= 3

262

elif variant == 'embedded':

263

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

ytcfg['priority'] -= 2

265

else:

266

ytcfg['priority'] -= 3

267

268

269

build_innertube_clients()

270

271

272

class YoutubeBaseInfoExtractor(InfoExtractor):

273

"""Provide base functions for Youtube extractors"""

274

275

_RESERVED_NAMES = (

276

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

282

283

# _NETRC_MACHINE = 'youtube'

284

285

# If True it will raise an error if no login info is provided

286

_LOGIN_REQUIRED = False

287

288

_INVIDIOUS_SITES = (

289

# invidious-redirect websites

290

r'(?:www\.)?redirect\.invidious\.io',

291

r'(?:(?:www|dev)\.)?invidio\.us',

292

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

293

r'(?:www\.)?invidious\.pussthecat\.org',

294

r'(?:www\.)?invidious\.zee\.li',

295

r'(?:www\.)?invidious\.ethibox\.fr',

296

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

297

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

298

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

299

# youtube-dl invidious instances list

300

r'(?:(?:www|no)\.)?invidiou\.sh',

301

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

302

r'(?:www\.)?invidious\.kabi\.tk',

303

r'(?:www\.)?invidious\.mastodon\.host',

304

r'(?:www\.)?invidious\.zapashcanon\.fr',

305

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

306

r'(?:www\.)?invidious\.tinfoil-hat\.net',

307

r'(?:www\.)?invidious\.himiko\.cloud',

308

r'(?:www\.)?invidious\.reallyancient\.tech',

309

r'(?:www\.)?invidious\.tube',

310

r'(?:www\.)?invidiou\.site',

311

r'(?:www\.)?invidious\.site',

312

r'(?:www\.)?invidious\.xyz',

313

r'(?:www\.)?invidious\.nixnet\.xyz',

314

r'(?:www\.)?invidious\.048596\.xyz',

315

r'(?:www\.)?invidious\.drycat\.fr',

316

r'(?:www\.)?inv\.skyn3t\.in',

317

r'(?:www\.)?tube\.poal\.co',

318

r'(?:www\.)?tube\.connect\.cafe',

319

r'(?:www\.)?vid\.wxzm\.sx',

320

r'(?:www\.)?vid\.mint\.lgbt',

321

r'(?:www\.)?vid\.puffyan\.us',

322

r'(?:www\.)?yewtu\.be',

323

r'(?:www\.)?yt\.elukerio\.org',

324

r'(?:www\.)?yt\.lelux\.fi',

325

r'(?:www\.)?invidious\.ggc-project\.de',

326

r'(?:www\.)?yt\.maisputain\.ovh',

327

r'(?:www\.)?ytprivate\.com',

328

r'(?:www\.)?invidious\.13ad\.de',

329

r'(?:www\.)?invidious\.toot\.koeln',

330

r'(?:www\.)?invidious\.fdn\.fr',

331

r'(?:www\.)?watch\.nettohikari\.com',

332

r'(?:www\.)?invidious\.namazso\.eu',

333

r'(?:www\.)?invidious\.silkky\.cloud',

334

r'(?:www\.)?invidious\.exonip\.de',

335

r'(?:www\.)?invidious\.riverside\.rocks',

336

r'(?:www\.)?invidious\.blamefran\.net',

337

r'(?:www\.)?invidious\.moomoo\.de',

338

r'(?:www\.)?ytb\.trom\.tf',

339

r'(?:www\.)?yt\.cyberhost\.uk',

340

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

341

r'(?:www\.)?qklhadlycap4cnod\.onion',

342

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

343

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

344

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

345

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

346

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

347

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

348

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

349

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

350

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

351

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

352

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

353

r'(?:www\.)?piped\.kavin\.rocks',

354

r'(?:www\.)?piped\.silkky\.cloud',

355

r'(?:www\.)?piped\.tokhmi\.xyz',

356

r'(?:www\.)?piped\.moomoo\.me',

357

r'(?:www\.)?il\.ax',

358

r'(?:www\.)?piped\.syncpundit\.com',

359

r'(?:www\.)?piped\.mha\.fi',

360

r'(?:www\.)?piped\.mint\.lgbt',

361

r'(?:www\.)?piped\.privacy\.com\.de',

362

)

363

364

def _initialize_consent(self):

365

cookies = self._get_cookies('https://www.youtube.com/')

366

if cookies.get('__Secure-3PSID'):

367

return

368

consent_id = None

369

consent = cookies.get('CONSENT')

370

if consent:

371

if 'YES' in consent.value:

372

return

373

consent_id = self._search_regex(

374

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

375

if not consent_id:

376

consent_id = random.randint(100, 999)

377

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

378

379

def _initialize_pref(self):

380

cookies = self._get_cookies('https://www.youtube.com/')

381

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

386

except ValueError:

387

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

388

pref.update({'hl': 'en', 'tz': 'UTC'})

389

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

390

391

def _real_initialize(self):

392

self._initialize_pref()

393

self._initialize_consent()

394

self._check_login_required()

395

396

def _check_login_required(self):

397

if self._LOGIN_REQUIRED and not self._cookies_passed:

398

self.raise_login_required('Login details are needed to download this content', method='cookies')

399

400

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

401

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

402

403

def _get_default_ytcfg(self, client='web'):

404

return copy.deepcopy(INNERTUBE_CLIENTS[client])

405

406

def _get_innertube_host(self, client='web'):

407

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

408

409

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

410

# try_get but with fallback to default ytcfg client values when present

411

_func = lambda y: try_get(y, getter, expected_type)

412

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

413

414

def _extract_client_name(self, ytcfg, default_client='web'):

415

return self._ytcfg_get_safe(

416

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

417

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

418

419

def _extract_client_version(self, ytcfg, default_client='web'):

420

return self._ytcfg_get_safe(

421

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

422

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

423

424

def _select_api_hostname(self, req_api_hostname, default_client=None):

425

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

426

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

427

428

def _extract_api_key(self, ytcfg=None, default_client='web'):

429

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

430

431

def _extract_context(self, ytcfg=None, default_client='web'):

432

context = get_first(

433

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

434

# Enforce language and tz for extraction

435

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

436

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

442

time_now = round(time.time())

443

if self._SAPISID is None:

444

yt_cookies = self._get_cookies('https://www.youtube.com')

445

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

446

# See: https://github.com/yt-dlp/yt-dlp/issues/393

447

sapisid_cookie = dict_get(

448

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

449

if sapisid_cookie and sapisid_cookie.value:

450

self._SAPISID = sapisid_cookie.value

451

self.write_debug('Extracted SAPISID cookie')

452

# SAPISID cookie is required if not already present

453

if not yt_cookies.get('SAPISID'):

454

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

455

self._set_cookie(

456

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

457

else:

458

self._SAPISID = False

459

if not self._SAPISID:

460

return None

461

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

462

sapisidhash = hashlib.sha1(

463

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

464

return f'SAPISIDHASH {time_now}_{sapisidhash}'

465

466

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

467

note='Downloading API JSON', errnote='Unable to download API page',

468

context=None, api_key=None, api_hostname=None, default_client='web'):

469

470

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

471

data.update(query)

472

real_headers = self.generate_api_headers(default_client=default_client)

473

real_headers.update({'content-type': 'application/json'})

474

if headers:

475

real_headers.update(headers)

476

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

477

or api_key or self._extract_api_key(default_client=default_client))

478

return self._download_json(

479

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

480

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

481

data=json.dumps(data).encode('utf8'), headers=real_headers,

482

query={'key': api_key, 'prettyPrint': 'false'})

483

484

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

485

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

486

487

@staticmethod

488

def _extract_session_index(*data):

489

"""

490

Index of current account in account list.

491

See: https://github.com/yt-dlp/yt-dlp/pull/519

492

"""

493

for ytcfg in data:

494

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

495

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

500

if ytcfg:

501

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

506

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

507

'identity token', default=None, fatal=False)

508

509

@staticmethod

510

def _extract_account_syncid(*args):

511

"""

512

Extract syncId required to download private playlists of secondary channels

513

@params response and/or ytcfg

514

"""

515

for data in args:

516

# ytcfg includes channel_syncid if on secondary channel

517

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

522

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

523

if len(sync_ids) >= 2 and sync_ids[1]:

524

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

525

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

530

"""

531

Extracts visitorData from an API response or ytcfg

532

Appears to be used to track session state

533

"""

534

return get_first(

535

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

536

expected_type=str)

537

538

@functools.cached_property

539

def is_authenticated(self):

540

return bool(self._generate_sapisidhash_header())

541

542

def extract_ytcfg(self, video_id, webpage):

543

if not webpage:

544

return {}

545

return self._parse_json(

546

self._search_regex(

547

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

548

default='{}'), video_id, fatal=False) or {}

549

550

def generate_api_headers(

551

self, *, ytcfg=None, account_syncid=None, session_index=None,

552

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

553

554

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

555

headers = {

556

'X-YouTube-Client-Name': compat_str(

557

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

558

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

559

'Origin': origin,

560

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

561

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

562

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

563

}

564

if session_index is None:

565

session_index = self._extract_session_index(ytcfg)

566

if account_syncid or session_index is not None:

567

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

568

569

auth = self._generate_sapisidhash_header(origin)

570

if auth is not None:

571

headers['Authorization'] = auth

572

headers['X-Origin'] = origin

573

return {h: v for h, v in headers.items() if v is not None}

574

575

def _download_ytcfg(self, client, video_id):

576

url = {

577

'web': 'https://www.youtube.com',

578

'web_music': 'https://music.youtube.com',

579

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

584

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

585

return self.extract_ytcfg(video_id, webpage) or {}

586

587

@staticmethod

588

def _build_api_continuation_query(continuation, ctp=None):

589

query = {

590

'continuation': continuation

591

}

592

# TODO: Inconsistency with clickTrackingParams.

593

# Currently we have a fixed ctp contained within context (from ytcfg)

594

# and a ctp in root query for continuation.

595

if ctp:

596

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

601

next_continuation = try_get(

602

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

603

lambda x: x['continuation']['reloadContinuationData']), dict)

604

if not next_continuation:

605

return

606

continuation = next_continuation.get('continuation')

607

if not continuation:

608

return

609

ctp = next_continuation.get('clickTrackingParams')

610

return cls._build_api_continuation_query(continuation, ctp)

611

612

@classmethod

613

def _extract_continuation_ep_data(cls, continuation_ep: dict):

614

if isinstance(continuation_ep, dict):

615

continuation = try_get(

616

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

617

if not continuation:

618

return

619

ctp = continuation_ep.get('clickTrackingParams')

620

return cls._build_api_continuation_query(continuation, ctp)

621

622

@classmethod

623

def _extract_continuation(cls, renderer):

624

next_continuation = cls._extract_next_continuation_data(renderer)

625

if next_continuation:

626

return next_continuation

627

628

contents = []

629

for key in ('contents', 'items'):

630

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

631

632

for content in contents:

633

if not isinstance(content, dict):

634

continue

635

continuation_ep = try_get(

636

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

637

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

638

dict)

639

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

645

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

646

if not isinstance(alert_dict, dict):

647

continue

648

for alert in alert_dict.values():

649

alert_type = alert.get('type')

650

if not alert_type:

651

continue

652

message = cls._get_text(alert, 'text')

653

if message:

654

yield alert_type, message

655

656

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

657

errors = []

658

warnings = []

659

for alert_type, alert_message in alerts:

660

if alert_type.lower() == 'error' and fatal:

661

errors.append([alert_type, alert_message])

662

else:

663

warnings.append([alert_type, alert_message])

664

665

for alert_type, alert_message in (warnings + errors[:-1]):

666

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

667

if errors:

668

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

669

670

def _extract_and_report_alerts(self, data, *args, **kwargs):

671

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

672

673

def _extract_badges(self, renderer: dict):

674

badges = set()

675

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

676

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

677

if label:

678

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

683

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

688

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

689

obj = [obj]

690

for item in obj:

691

text = try_get(item, lambda x: x['simpleText'], compat_str)

692

if text:

693

return text

694

runs = try_get(item, lambda x: x['runs'], list) or []

695

if not runs and isinstance(item, list):

696

runs = item

697

698

runs = runs[:min(len(runs), max_runs or len(runs))]

699

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

704

count_text = self._get_text(data, *path_list) or ''

705

count = parse_count(count_text)

706

if count is None:

707

count = str_to_int(

708

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

713

"""

714

Extract thumbnails from thumbnails dict

715

@param path_list: path list to level that contains 'thumbnails' key

716

"""

717

thumbnails = []

718

for path in path_list or [()]:

719

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

720

thumbnail_url = url_or_none(thumbnail.get('url'))

721

if not thumbnail_url:

722

continue

723

# Sometimes youtube gives a wrong thumbnail URL. See:

724

# https://github.com/yt-dlp/yt-dlp/issues/233

725

# https://github.com/ytdl-org/youtube-dl/issues/28023

726

if 'maxresdefault' in thumbnail_url:

727

thumbnail_url = thumbnail_url.split('?')[0]

728

thumbnails.append({

729

'url': thumbnail_url,

730

'height': int_or_none(thumbnail.get('height')),

731

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

737

"""

738

Extracts a relative time from string and converts to dt object

739

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

744

if start:

745

return datetime_from_str(start)

746

try:

747

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

752

"""@returns (timestamp, time_text)"""

753

text = self._get_text(renderer, *path_list) or ''

754

dt = self.extract_relative_time(text)

755

timestamp = None

756

if isinstance(dt, datetime.datetime):

757

timestamp = calendar.timegm(dt.timetuple())

758

759

if timestamp is None:

760

timestamp = (

761

unified_timestamp(text) or unified_timestamp(

762

self._search_regex(

763

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

764

text.lower(), 'time text', default=None)))

765

766

if text and timestamp is None:

767

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

768

return timestamp, text

769

770

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

771

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

772

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

777

if check_get_keys is None:

778

check_get_keys = []

779

while count < retries:

780

count += 1

781

if last_error:

782

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

783

try:

784

response = self._call_api(

785

ep=ep, fatal=True, headers=headers,

786

video_id=item_id, query=query,

787

context=self._extract_context(ytcfg, default_client),

788

api_key=self._extract_api_key(ytcfg, default_client),

789

api_hostname=api_hostname, default_client=default_client,

790

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

791

except ExtractorError as e:

792

if isinstance(e.cause, network_exceptions):

793

if isinstance(e.cause, compat_HTTPError):

794

first_bytes = e.cause.read(512)

795

if not is_html(first_bytes):

796

yt_error = try_get(

797

self._parse_json(

798

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

799

lambda x: x['error']['message'], compat_str)

800

if yt_error:

801

self._report_alerts([('ERROR', yt_error)], fatal=False)

802

# Downloading page may result in intermittent 5xx HTTP error

803

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

804

# We also want to catch all other network exceptions since errors in later pages can be troublesome

805

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

806

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

807

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

819

except ExtractorError as e:

820

# YouTube servers may return errors we want to retry on in a 200 OK response

821

# See: https://github.com/yt-dlp/yt-dlp/issues/839

822

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

828

return

829

if not check_get_keys or dict_get(response, check_get_keys):

830

break

831

# Youtube sometimes sends incomplete data

832

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

833

last_error = 'Incomplete data received'

834

if count >= retries:

835

if fatal:

836

raise ExtractorError(last_error)

837

else:

838

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

844

return re.match(r'https?://music\.youtube\.com/', url) is not None

845

846

def _extract_video(self, renderer):

847

video_id = renderer.get('videoId')

848

title = self._get_text(renderer, 'title')

849

description = self._get_text(renderer, 'descriptionSnippet')

850

duration = parse_duration(self._get_text(

851

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

852

if duration is None:

853

duration = parse_duration(self._search_regex(

854

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

855

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

856

video_id, default=None, group='duration'))

857

858

view_count = self._get_count(renderer, 'viewCountText')

859

860

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

861

channel_id = traverse_obj(

862

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

863

expected_type=str, get_all=False)

864

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

865

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

866

overlay_style = traverse_obj(

867

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

868

get_all=False, expected_type=str)

869

badges = self._extract_badges(renderer)

870

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

871

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

872

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

873

expected_type=str)) or ''

874

url = f'https://www.youtube.com/watch?v={video_id}'

875

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

876

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

885

'duration': duration,

886

'view_count': view_count,

887

'uploader': uploader,

888

'channel_id': channel_id,

889

'thumbnails': thumbnails,

890

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

891

if self._configuration_arg('approximate_date', ie_key='youtubetab')

892

else None),

893

'live_status': ('is_upcoming' if scheduled_timestamp is not None

894

else 'was_live' if 'streamed' in time_text.lower()

895

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

896

else None),

897

'release_timestamp': scheduled_timestamp,

898

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

903

IE_DESC = 'YouTube'

904

_VALID_URL = r"""(?x)^

905

(

906

(?:https?://|//) # http(s):// or protocol-independent URL

907

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

908

(?:www\.)?deturl\.com/www\.youtube\.com|

909

(?:www\.)?pwnyoutube\.com|

910

(?:www\.)?hooktube\.com|

911

(?:www\.)?yourepeat\.com|

912

tube\.majestyc\.net|

913

%(invidious)s|

914

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

915

(?:.*?\#/)? # handle anchor (#/) redirect urls

916

(?: # the various things that can precede the ID:

917

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

918

|(?: # or the v= param in all its forms

919

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

920

(?:\?|\#!?) # the params delimiter ? or # or #!

921

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

927

vid\.plus| # or vid.plus/xxxx

928

zwearz\.com/watch| # or zwearz.com/watch/xxxx

929

%(invidious)s

930

)/

931

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

932

)

933

)? # all until now is optional -> you can pass the naked ID

934

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

935

(?(1).+)? # if we found the ID, everything can follow

936

(?:\#|$)""" % {

937

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

938

}

939

_PLAYER_INFO_RE = (

940

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

941

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

942

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

943

)

944

_formats = {

945

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

946

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

947

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

948

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

949

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

950

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

951

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

952

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

953

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

954

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

955

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

956

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

957

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

958

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

959

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

960

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

961

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

962

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

967

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

968

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

969

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

970

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

971

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

972

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

973

974

# Apple HTTP Live Streaming

975

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

976

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

977

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

978

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

979

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

980

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

981

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

982

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

983

984

# DASH mp4 video

985

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

988

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

990

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

991

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

992

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

993

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

994

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

995

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

996

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

997

998

# Dash mp4 audio

999

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1000

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1001

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1002

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1003

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1004

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1005

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1006

1007

# Dash webm

1008

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1009

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1010

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1011

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1012

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1013

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1014

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1015

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1016

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1019

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1020

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1023

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1024

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1025

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1026

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1027

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1028

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1029

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1030

1031

# Dash webm audio

1032

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1033

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1034

1035

# Dash webm audio with opus inside

1036

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1037

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1038

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1039

1040

# RTMP (unnamed)

1041

'_rtmp': {'protocol': 'rtmp'},

1042

1043

# av01 video only formats sometimes served with "unknown" codecs

1044

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1045

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1046

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1047

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1048

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1049

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1050

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1051

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1052

}

1053

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1065

'uploader': 'Philipp Hagemeister',

1066

'uploader_id': 'phihag',

1067

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1068

'channel': 'Philipp Hagemeister',

1069

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1070

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1071

'upload_date': '20121002',

1072

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1073

'categories': ['Science & Technology'],

1074

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1079

'playable_in_embed': True,

1080

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1081

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1090

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1095

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1096

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1097

'uploader': 'SET India',

1098

'uploader_id': 'setindia',

1099

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1100

'age_limit': 18,

1101

},

1102

'skip': 'Private video',

1103

},

1104

{

1105

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1106

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1111

'uploader': 'Philipp Hagemeister',

1112

'uploader_id': 'phihag',

1113

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1114

'channel': 'Philipp Hagemeister',

1115

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1116

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1117

'upload_date': '20121002',

1118

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1119

'categories': ['Science & Technology'],

1120

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1125

'playable_in_embed': True,

1126

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1127

'live_status': 'not_live',

1128

'age_limit': 0,

1129

'channel_follower_count': int

1130

},

1131

'params': {

1132

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1137

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1142

'uploader_id': '8KVIDEO',

1143

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1144

'description': '',

1145

'uploader': '8KVIDEO',

1146

'title': 'UHDTV TEST 8K VIDEO.mp4'

1147

},

1148

'params': {

1149

'youtube_include_dash_manifest': True,

1150

'format': '141',

1151

},

1152

'skip': 'format 141 not served anymore',

1153

},

1154

# DASH manifest with encrypted signature

1155

{

1156

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1161

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1162

'duration': 244,

1163

'uploader': 'AfrojackVEVO',

1164

'uploader_id': 'AfrojackVEVO',

1165

'upload_date': '20131011',

1166

'abr': 129.495,

1167

'like_count': int,

1168

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1169

'playable_in_embed': True,

1170

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1171

'view_count': int,

1172

'track': 'The Spark',

1173

'live_status': 'not_live',

1174

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1175

'channel': 'Afrojack',

1176

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1177

'tags': 'count:19',

1178

'availability': 'public',

1179

'categories': ['Music'],

1180

'age_limit': 0,

1181

'alt_title': 'The Spark',

1182

'channel_follower_count': int

1183

},

1184

'params': {

1185

'youtube_include_dash_manifest': True,

1186

'format': '141/bestaudio[ext=m4a]',

1187

},

1188

},

1189

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1190

{

1191

'note': 'Embed allowed age-gate video',

1192

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1197

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1198

'duration': 142,

1199

'uploader': 'The Witcher',

1200

'uploader_id': 'WitcherGame',

1201

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1202

'upload_date': '20140605',

1203

'age_limit': 18,

1204

'categories': ['Gaming'],

1205

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1206

'availability': 'needs_auth',

1207

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1208

'like_count': int,

1209

'channel': 'The Witcher',

1210

'live_status': 'not_live',

1211

'tags': 'count:17',

1212

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1213

'playable_in_embed': True,

1214

'view_count': int,

1215

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1220

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1225

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1226

'upload_date': '20200408',

1227

'uploader_id': 'FlyingKitty900',

1228

'uploader': 'FlyingKitty',

1229

'age_limit': 18,

1230

'availability': 'needs_auth',

1231

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1232

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1233

'channel': 'FlyingKitty',

1234

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1235

'view_count': int,

1236

'categories': ['Entertainment'],

1237

'live_status': 'not_live',

1238

'tags': ['Flyingkitty', 'godzilla 2'],

1239

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1240

'like_count': int,

1241

'duration': 177,

1242

'playable_in_embed': True,

1243

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1248

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1249

'info_dict': {

1250

'id': 'Tq92D6wQ1mg',

1251

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1252

'ext': 'mp4',

1253

'upload_date': '20191228',

1254

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1255

'uploader': 'Projekt Melody',

1256

'description': 'md5:17eccca93a786d51bc67646756894066',

1257

'age_limit': 18,

1258

'like_count': int,

1259

'availability': 'needs_auth',

1260

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1262

'view_count': int,

1263

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1264

'channel': 'Projekt Melody',

1265

'live_status': 'not_live',

1266

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1267

'playable_in_embed': True,

1268

'categories': ['Entertainment'],

1269

'duration': 106,

1270

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1271

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1276

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1281

'uploader': 'Herr Lurik',

1282

'uploader_id': 'st3in234',

1283

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1284

'upload_date': '20130730',

1285

'track': 'Such mich find mich',

1286

'age_limit': 0,

1287

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1288

'like_count': int,

1289

'playable_in_embed': False,

1290

'creator': 'OOMPH!',

1291

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1292

'view_count': int,

1293

'alt_title': 'Such mich find mich',

1294

'duration': 210,

1295

'channel': 'Herr Lurik',

1296

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1297

'categories': ['Music'],

1298

'availability': 'public',

1299

'uploader_url': 'http://www.youtube.com/user/st3in234',

1300

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1301

'live_status': 'not_live',

1302

'artist': 'OOMPH!',

1303

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1308

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1309

'only_matching': True,

1310

},

1311

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1312

# YouTube Red ad is not captured for creator

1313

{

1314

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1320

'uploader_id': 'deadmau5',

1321

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1322

'creator': 'deadmau5',

1323

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1324

'uploader': 'deadmau5',

1325

'title': 'Deadmau5 - Some Chords (HD)',

1326

'alt_title': 'Some Chords',

1327

'availability': 'public',

1328

'tags': 'count:14',

1329

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1330

'view_count': int,

1331

'live_status': 'not_live',

1332

'channel': 'deadmau5',

1333

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1334

'like_count': int,

1335

'track': 'Some Chords',

1336

'artist': 'deadmau5',

1337

'playable_in_embed': True,

1338

'age_limit': 0,

1339

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1340

'categories': ['Music'],

1341

'album': 'Some Chords',

1342

'channel_follower_count': int

1343

},

1344

'expected_warnings': [

1345

'DASH manifest missing',

1346

]

1347

},

1348

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1349

{

1350

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1356

'uploader_id': 'olympic',

1357

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1358

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1359

'uploader': 'Olympics',

1360

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1361

'like_count': int,

1362

'release_timestamp': 1343767800,

1363

'playable_in_embed': True,

1364

'categories': ['Sports'],

1365

'release_date': '20120731',

1366

'channel': 'Olympics',

1367

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1368

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1369

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1370

'age_limit': 0,

1371

'availability': 'public',

1372

'live_status': 'was_live',

1373

'view_count': int,

1374

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1375

'channel_follower_count': int

1376

},

1377

'params': {

1378

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1388

'duration': 85,

1389

'upload_date': '20110310',

1390

'uploader_id': 'AllenMeow',

1391

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1392

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1393

'uploader': '孫ᄋᄅ',

1394

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1395

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1400

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1401

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1402

'view_count': int,

1403

'categories': ['People & Blogs'],

1404

'like_count': int,

1405

'live_status': 'not_live',

1406

'availability': 'unlisted',

1407

'channel_follower_count': int

1408

},

1409

},

1410

# url_encoded_fmt_stream_map is empty string

1411

{

1412

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1417

'description': '',

1418

'upload_date': '20150404',

1419

'uploader_id': 'spbelect',

1420

'uploader': 'Наблюдатели Петербурга',

1421

},

1422

'params': {

1423

'skip_download': 'requires avconv',

1424

},

1425

'skip': 'This live event has ended.',

1426

},

1427

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1428

{

1429

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1434

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1435

'duration': 220,

1436

'upload_date': '20150625',

1437

'uploader_id': 'dorappi2000',

1438

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1439

'uploader': 'dorappi2000',

1440

'formats': 'mincount:31',

1441

},

1442

'skip': 'not actual anymore',

1443

},

1444

# DASH manifest with segment_list

1445

{

1446

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1447

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1452

'uploader': 'Airtek',

1453

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1454

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1455

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1456

},

1457

'params': {

1458

'youtube_include_dash_manifest': True,

1459

'format': '135', # bestvideo

1460

},

1461

'skip': 'This live event has ended.',

1462

},

1463

{

1464

# Multifeed videos (multiple cameras), URL is for Main Camera

1465

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1466

'info_dict': {

1467

'id': 'jvGDaLqkpTg',

1468

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1469

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1476

'description': 'md5:e03b909557865076822aa169218d6a5d',

1477

'duration': 10643,

1478

'upload_date': '20161111',

1479

'uploader': 'Team PGP',

1480

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1481

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1488

'description': 'md5:e03b909557865076822aa169218d6a5d',

1489

'duration': 10991,

1490

'upload_date': '20161111',

1491

'uploader': 'Team PGP',

1492

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1493

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1500

'description': 'md5:e03b909557865076822aa169218d6a5d',

1501

'duration': 10995,

1502

'upload_date': '20161111',

1503

'uploader': 'Team PGP',

1504

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1505

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1512

'description': 'md5:e03b909557865076822aa169218d6a5d',

1513

'duration': 10990,

1514

'upload_date': '20161111',

1515

'uploader': 'Team PGP',

1516

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1517

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1522

},

1523

'skip': 'Not multifeed anymore',

1524

},

1525

{

1526

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1527

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1528

'info_dict': {

1529

'id': 'gVfLd0zydlo',

1530

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1531

},

1532

'playlist_count': 2,

1533

'skip': 'Not multifeed anymore',

1534

},

1535

{

1536

'url': 'https://vid.plus/FlRa-iH7PGw',

1537

'only_matching': True,

1538

},

1539

{

1540

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1541

'only_matching': True,

1542

},

1543

{

1544

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1545

# Also tests cut-off URL expansion in video description (see

1546

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1547

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1548

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1553

'alt_title': 'Dark Walk',

1554

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1555

'duration': 133,

1556

'upload_date': '20151119',

1557

'uploader_id': 'IronSoulElf',

1558

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1559

'uploader': 'IronSoulElf',

1560

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1561

'track': 'Dark Walk',

1562

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1563

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1564

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1565

'categories': ['Film & Animation'],

1566

'view_count': int,

1567

'live_status': 'not_live',

1568

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1569

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1570

'tags': 'count:13',

1571

'availability': 'public',

1572

'channel': 'IronSoulElf',

1573

'playable_in_embed': True,

1574

'like_count': int,

1575

'age_limit': 0,

1576

'channel_follower_count': int

1577

},

1578

'params': {

1579

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1584

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1585

'only_matching': True,

1586

},

1587

{

1588

# Video with yt:stretch=17:0

1589

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1594

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1595

'upload_date': '20151107',

1596

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1597

'uploader': 'CH GAMER DROID',

1598

},

1599

'params': {

1600

'skip_download': True,

1601

},

1602

'skip': 'This video does not exist.',

1603

},

1604

{

1605

# Video with incomplete 'yt:stretch=16:'

1606

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1607

'only_matching': True,

1608

},

1609

{

1610

# Video licensed under Creative Commons

1611

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1616

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1617

'duration': 721,

1618

'upload_date': '20150128',

1619

'uploader_id': 'BerkmanCenter',

1620

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1621

'uploader': 'The Berkman Klein Center for Internet & Society',

1622

'license': 'Creative Commons Attribution license (reuse allowed)',

1623

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1624

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1625

'like_count': int,

1626

'age_limit': 0,

1627

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1628

'channel': 'The Berkman Klein Center for Internet & Society',

1629

'availability': 'public',

1630

'view_count': int,

1631

'categories': ['Education'],

1632

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1633

'live_status': 'not_live',

1634

'playable_in_embed': True,

1635

'channel_follower_count': int

1636

},

1637

'params': {

1638

'skip_download': True,

},

},

{

# Channel-like uploader_url

1643

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1648

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1649

'duration': 4060,

1650

'upload_date': '20151120',

1651

'uploader': 'Bernie Sanders',

1652

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1653

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1654

'license': 'Creative Commons Attribution license (reuse allowed)',

1655

'playable_in_embed': True,

1656

'tags': 'count:12',

1657

'like_count': int,

1658

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1659

'age_limit': 0,

1660

'availability': 'public',

1661

'categories': ['News & Politics'],

1662

'channel': 'Bernie Sanders',

1663

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1664

'view_count': int,

1665

'live_status': 'not_live',

1666

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1667

'channel_follower_count': int

1668

},

1669

'params': {

1670

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1675

'only_matching': True,

1676

},

1677

{

1678

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1679

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1680

'only_matching': True,

1681

},

1682

{

1683

# Rental video preview

1684

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1689

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1690

'upload_date': '20150811',

1691

'uploader': 'FlixMatrix',

1692

'uploader_id': 'FlixMatrixKaravan',

1693

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1694

'license': 'Standard YouTube License',

1695

},

1696

'params': {

1697

'skip_download': True,

1698

},

1699

'skip': 'This video is not available.',

1700

},

1701

{

1702

# YouTube Red video with episode data

1703

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1708

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1709

'duration': 2085,

1710

'upload_date': '20170118',

1711

'uploader': 'Vsauce',

1712

'uploader_id': 'Vsauce',

1713

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1714

'series': 'Mind Field',

1715

'season_number': 1,

1716

'episode_number': 1,

1717

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1718

'tags': 'count:12',

1719

'view_count': int,

1720

'availability': 'public',

1721

'age_limit': 0,

1722

'channel': 'Vsauce',

1723

'episode': 'Episode 1',

1724

'categories': ['Entertainment'],

1725

'season': 'Season 1',

1726

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1727

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1728

'like_count': int,

1729

'playable_in_embed': True,

1730

'live_status': 'not_live',

1731

'channel_follower_count': int

1732

},

1733

'params': {

1734

'skip_download': True,

1735

},

1736

'expected_warnings': [

1737

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1742

# as inappropriate or offensive to some audiences.

1743

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1748

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1749

'duration': 965,

1750

'upload_date': '20140124',

1751

'uploader': 'New Century Foundation',

1752

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1753

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1754

},

1755

'params': {

1756

'skip_download': True,

1757

},

1758

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1763

'only_matching': True,

1764

},

1765

{

1766

# geo restricted to JP

1767

'url': 'sJL6WA-aGkQ',

1768

'only_matching': True,

1769

},

1770

{

1771

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1772

'only_matching': True,

1773

},

1774

{

1775

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1776

'only_matching': True,

1777

},

1778

{

1779

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1780

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1781

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1786

'only_matching': True,

1787

},

1788

{

1789

# Video with unsupported adaptive stream type formats

1790

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1795

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1796

'duration': 433,

1797

'upload_date': '20130923',

1798

'uploader': 'Amelia Putri Harwita',

1799

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1800

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1801

'formats': 'maxcount:10',

1802

},

1803

'params': {

1804

'skip_download': True,

1805

'youtube_include_dash_manifest': False,

1806

},

1807

'skip': 'not actual anymore',

1808

},

1809

{

1810

# Youtube Music Auto-generated description

1811

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1816

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1817

'upload_date': '20190312',

1818

'uploader': 'Stephen - Topic',

1819

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1820

'artist': 'Stephen',

1821

'track': 'Voyeur Girl',

1822

'album': 'it\'s too much love to know my dear',

1823

'release_date': '20190313',

1824

'release_year': 2019,

1825

'alt_title': 'Voyeur Girl',

1826

'view_count': int,

1827

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1828

'playable_in_embed': True,

1829

'like_count': int,

1830

'categories': ['Music'],

1831

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1832

'channel': 'Stephen',

1833

'availability': 'public',

1834

'creator': 'Stephen',

1835

'duration': 169,

1836

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1837

'age_limit': 0,

1838

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1839

'tags': 'count:11',

1840

'live_status': 'not_live',

1841

'channel_follower_count': int

1842

},

1843

'params': {

1844

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1849

'only_matching': True,

1850

},

1851

{

1852

# invalid -> valid video id redirection

1853

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1858

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1859

'upload_date': '20090125',

1860

'uploader': 'Prochorowka',

1861

'uploader_id': 'Prochorowka',

1862

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1863

'artist': 'Panjabi MC',

1864

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1865

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1866

},

1867

'params': {

1868

'skip_download': True,

1869

},

1870

'skip': 'Video unavailable',

1871

},

1872

{

1873

# empty description results in an empty string

1874

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1881

'uploader_id': 'ElevageOrVert',

1882

'uploader': 'ElevageOrVert',

1883

'view_count': int,

1884

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1885

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1886

'like_count': int,

1887

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1888

'tags': [],

1889

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1890

'availability': 'public',

1891

'age_limit': 0,

1892

'categories': ['Pets & Animals'],

1893

'duration': 7,

1894

'playable_in_embed': True,

1895

'live_status': 'not_live',

1896

'channel': 'ElevageOrVert',

1897

'channel_follower_count': int

1898

},

1899

'params': {

1900

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1905

# see [2] for an example with '};' inside ytInitialPlayerResponse

1906

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1907

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1908

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1913

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1914

'upload_date': '20130831',

1915

'uploader_id': 'kudvenkat',

1916

'uploader': 'kudvenkat',

1917

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1918

'like_count': int,

1919

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1920

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1921

'live_status': 'not_live',

1922

'categories': ['Education'],

1923

'availability': 'public',

1924

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1925

'tags': 'count:12',

1926

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1931

'channel_follower_count': int

1932

},

1933

'params': {

1934

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1939

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1940

'only_matching': True,

1941

},

1942

{

1943

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1944

'only_matching': True,

1945

},

1946

{

1947

# https://github.com/ytdl-org/youtube-dl/pull/28094

1948

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1954

'upload_date': '20141120',

1955

'uploader': 'The Cinematic Orchestra - Topic',

1956

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1957

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1958

'artist': 'The Cinematic Orchestra',

1959

'track': 'Burn Out',

1960

'album': 'Every Day',

1961

'like_count': int,

1962

'live_status': 'not_live',

1963

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1968

'creator': 'The Cinematic Orchestra',

1969

'channel': 'The Cinematic Orchestra',

1970

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1971

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1972

'availability': 'public',

1973

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1974

'categories': ['Music'],

1975

'playable_in_embed': True,

1976

'channel_follower_count': int

1977

},

1978

'params': {

1979

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1984

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1985

'only_matching': True,

1986

},

1987

{

1988

# controversial video, requires bpctr/contentCheckOk

1989

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1994

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1995

'uploader': 'CBS Mornings',

1996

'uploader_id': 'CBSThisMorning',

1997

'upload_date': '20140716',

1998

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1999

'duration': 170,

2000

'categories': ['News & Politics'],

2001

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2002

'view_count': int,

2003

'channel': 'CBS Mornings',

2004

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2005

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2006

'age_limit': 18,

2007

'availability': 'needs_auth',

2008

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2009

'like_count': int,

2010

'live_status': 'not_live',

2011

'playable_in_embed': True,

2012

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2017

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2022

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2023

'upload_date': '20201120',

2024

'uploader': 'Walk around Japan',

2025

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2027

'duration': 1456,

2028

'categories': ['Travel & Events'],

2029

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2030

'view_count': int,

2031

'channel': 'Walk around Japan',

2032

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2033

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2034

'age_limit': 0,

2035

'availability': 'public',

2036

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2037

'live_status': 'not_live',

2038

'playable_in_embed': True,

2039

'channel_follower_count': int

2040

},

2041

'params': {

2042

'skip_download': True,

2043

},

2044

}, {

2045

# Has multiple audio streams

2046

'url': 'WaOKSUlf4TM',

2047

'only_matching': True

2048

}, {

2049

# Requires Premium: has format 141 when requested using YTM url

2050

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2051

'only_matching': True

2052

}, {

2053

# multiple subtitles with same lang_code

2054

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2055

'only_matching': True,

2056

}, {

2057

# Force use android client fallback

2058

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2059

'info_dict': {

2060

'id': 'YOelRv7fMxY',

2061

'title': 'DIGGING A SECRET TUNNEL Part 1',

2062

'ext': '3gp',

2063

'upload_date': '20210624',

2064

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2065

'uploader': 'colinfurze',

2066

'uploader_id': 'colinfurze',

2067

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2068

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2069

'duration': 596,

2070

'categories': ['Entertainment'],

2071

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2072

'view_count': int,

2073

'channel': 'colinfurze',

2074

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2075

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2076

'age_limit': 0,

2077

'availability': 'public',

2078

'like_count': int,

2079

'live_status': 'not_live',

2080

'playable_in_embed': True,

2081

'channel_follower_count': int

2082

},

2083

'params': {

2084

'format': '17', # 3gp format available on android

2085

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2090

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2091

'only_matching': True,

2092

'params': {

2093

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2098

'only_matching': True,

2099

}, {

2100

'note': 'Storyboards',

2101

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2107

'uploader_id': 'scishow',

2108

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2109

'upload_date': '20140324',

2110

'uploader': 'SciShow',

2111

'like_count': int,

2112

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2113

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2114

'view_count': int,

2115

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2116

'playable_in_embed': True,

2117

'tags': 'count:12',

2118

'uploader_url': 'http://www.youtube.com/user/scishow',

2119

'availability': 'public',

2120

'channel': 'SciShow',

2121

'live_status': 'not_live',

2122

'duration': 248,

2123

'categories': ['Education'],

2124

'age_limit': 0,

2125

'channel_follower_count': int

2126

}, 'params': {'format': 'mhtml', 'skip_download': True}

2127

}, {

2128

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2129

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2134

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2135

'uploader': 'Leon Nguyen',

2136

'uploader_id': 'VNSXIII',

2137

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2138

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2139

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2144

'tags': 'count:23',

2145

'playable_in_embed': True,

2146

'live_status': 'not_live',

2147

'upload_date': '20220103',

2148

'like_count': int,

2149

'availability': 'public',

2150

'channel': 'Leon Nguyen',

2151

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2152

'channel_follower_count': int

2153

}

2154

}, {

2155

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2156

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2161

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2162

'uploader': 'Quackity',

2163

'uploader_id': 'QuackityHQ',

2164

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2165

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2166

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2171

'tags': 'count:26',

2172

'playable_in_embed': True,

2173

'live_status': 'not_live',

2174

'release_timestamp': 1641172509,

2175

'release_date': '20220103',

2176

'upload_date': '20220103',

2177

'like_count': int,

2178

'availability': 'public',

2179

'channel': 'Quackity',

2180

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2181

'channel_follower_count': int

2182

}

2183

},

2184

{ # continuous livestream. Microformat upload date should be preferred.

2185

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2186

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2187

'info_dict': {

2188

'id': 'kgx4WGK0oNU',

2189

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2190

'ext': 'mp4',

2191

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2192

'availability': 'public',

2193

'age_limit': 0,

2194

'release_timestamp': 1637975704,

2195

'upload_date': '20210619',

2196

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2197

'live_status': 'is_live',

2198

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2199

'uploader': '阿鲍Abao',

2200

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2201

'channel': 'Abao in Tokyo',

2202

'channel_follower_count': int,

2203

'release_date': '20211127',

2204

'tags': 'count:39',

2205

'categories': ['People & Blogs'],

2206

'like_count': int,

2207

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2208

'view_count': int,

2209

'playable_in_embed': True,

2210

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2211

},

2212

'params': {'skip_download': True}

2213

}, {

2214

# Story. Requires specific player params to work.

2215

# Note: stories get removed after some period of time

2216

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2221

'view_count': int,

2222

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2223

'upload_date': '20220526',

2224

'categories': ['Education'],

2225

'title': 'Story',

2226

'channel': 'IT\'S HISTORY',

2227

'description': '',

2228

'uploader_id': 'BlastfromthePast',

2229

'duration': 12,

2230

'uploader': 'IT\'S HISTORY',

2231

'playable_in_embed': True,

2232

'age_limit': 0,

2233

'live_status': 'not_live',

2234

'tags': [],

2235

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2236

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2237

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2238

}

2239

}, {

2240

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2245

'upload_date': '20220323',

2246

'like_count': int,

2247

'availability': 'unlisted',

2248

'channel': 'nao20010128nao',

2249

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2250

'age_limit': 0,

2251

'uploader': 'nao20010128nao',

2252

'uploader_id': 'nao20010128nao',

2253

'categories': ['Music'],

2254

'view_count': int,

2255

'description': '',

2256

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2257

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2258

'live_status': 'not_live',

2259

'playable_in_embed': True,

2260

'channel_follower_count': int,

2261

'duration': 6,

2262

'tags': [],

2263

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2270

from ..utils import parse_qs

2271

2272

qs = parse_qs(url)

2273

if qs.get('list', [None])[0]:

2274

return False

2275

return super().suitable(url)

2276

2277

def __init__(self, *args, **kwargs):

2278

super().__init__(*args, **kwargs)

2279

self._code_cache = {}

2280

self._player_cache = {}

2281

2282

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2283

lock = threading.Lock()

2284

2285

is_live = True

2286

start_time = time.time()

2287

formats = [f for f in formats if f.get('is_from_start')]

2288

2289

def refetch_manifest(format_id, delay):

2290

nonlocal formats, start_time, is_live

2291

if time.time() <= start_time + delay:

2292

return

2293

2294

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2295

video_details = traverse_obj(

2296

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2297

microformats = traverse_obj(

2298

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2299

expected_type=dict, default=[])

2300

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2301

start_time = time.time()

2302

2303

def mpd_feed(format_id, delay):

2304

"""

2305

@returns (manifest_url, manifest_stream_number, is_live) or None

2306

"""

2307

with lock:

2308

refetch_manifest(format_id, delay)

2309

2310

f = next((f for f in formats if f['format_id'] == format_id), None)

2311

if not f:

2312

if not is_live:

2313

self.to_screen(f'{video_id}: Video is no longer live')

2314

else:

2315

self.report_warning(

2316

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2317

return None

2318

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2323

f['fragments'] = functools.partial(

2324

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2325

2326

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2327

FETCH_SPAN, MAX_DURATION = 5, 432000

2328

2329

mpd_url, stream_number, is_live = None, None, True

2330

2331

begin_index = 0

2332

download_start_time = ctx.get('start') or time.time()

2333

2334

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2335

if lack_early_segments:

2336

self.report_warning(bug_reports_message(

2337

'Starting download from the last 120 hours of the live stream since '

2338

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2339

lack_early_segments = True

2340

2341

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2342

fragments, fragment_base_url = None, None

2343

2344

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2345

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2346

# Obtain from MPD's maximum seq value

2347

old_mpd_url = mpd_url

2348

last_error = ctx.pop('last_error', None)

2349

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2350

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2351

or (mpd_url, stream_number, False))

2352

if not refresh_sequence:

2353

if expire_fast and not is_live:

2354

return False, last_seq

2355

elif old_mpd_url == mpd_url:

2356

return True, last_seq

2357

try:

2358

fmts, _ = self._extract_mpd_formats_and_subtitles(

2359

mpd_url, None, note=False, errnote=False, fatal=False)

2360

except ExtractorError:

2361

fmts = None

2362

if not fmts:

2363

no_fragment_score += 2

2364

return False, last_seq

2365

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2366

fragments = fmt_info['fragments']

2367

fragment_base_url = fmt_info['fragment_base_url']

2368

assert fragment_base_url

2369

2370

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2371

return True, _last_seq

2372

2373

while is_live:

2374

fetch_time = time.time()

2375

if no_fragment_score > 30:

2376

return

2377

if last_segment_url:

2378

# Obtain from "X-Head-Seqnum" header value from each segment

2379

try:

2380

urlh = self._request_webpage(

2381

last_segment_url, None, note=False, errnote=False, fatal=False)

2382

except ExtractorError:

2383

urlh = None

2384

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2385

if last_seq is None:

2386

no_fragment_score += 2

2387

last_segment_url = None

2388

continue

2389

else:

2390

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2391

no_fragment_score += 2

2392

if not should_continue:

2393

continue

2394

2395

if known_idx > last_seq:

2396

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2402

# skip from the start when it's negative value

2403

known_idx = last_seq + begin_index

2404

if lack_early_segments:

2405

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2406

try:

2407

for idx in range(known_idx, last_seq):

2408

# do not update sequence here or you'll get skipped some part of it

2409

should_continue, _ = _extract_sequence_from_mpd(False, False)

2410

if not should_continue:

2411

known_idx = idx - 1

2412

raise ExtractorError('breaking out of outer loop')

2413

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2414

yield {

2415

'url': last_segment_url,

2416

'fragment_count': last_seq,

2417

}

2418

if known_idx == last_seq:

2419

no_fragment_score += 5

2420

else:

2421

no_fragment_score = 0

2422

known_idx = last_seq

2423

except ExtractorError:

2424

continue

2425

2426

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2427

2428

def _extract_player_url(self, *ytcfgs, webpage=None):

2429

player_url = traverse_obj(

2430

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2431

get_all=False, expected_type=compat_str)

2432

if not player_url:

2433

return

2434

return urljoin('https://www.youtube.com', player_url)

2435

2436

def _download_player_url(self, video_id, fatal=False):

2437

res = self._download_webpage(

2438

'https://www.youtube.com/iframe_api',

2439

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2440

if res:

2441

player_version = self._search_regex(

2442

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2443

if player_version:

2444

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2445

2446

def _signature_cache_id(self, example_sig):

2447

""" Return a string representation of a signature """

2448

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2449

2450

@classmethod

2451

def _extract_player_info(cls, player_url):

2452

for player_re in cls._PLAYER_INFO_RE:

2453

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2458

return id_m.group('id')

2459

2460

def _load_player(self, video_id, player_url, fatal=True):

2461

player_id = self._extract_player_info(player_url)

2462

if player_id not in self._code_cache:

2463

code = self._download_webpage(

2464

player_url, video_id, fatal=fatal,

2465

note='Downloading player ' + player_id,

2466

errnote='Download of %s failed' % player_url)

2467

if code:

2468

self._code_cache[player_id] = code

2469

return self._code_cache.get(player_id)

2470

2471

def _extract_signature_function(self, video_id, player_url, example_sig):

2472

player_id = self._extract_player_info(player_url)

2473

2474

# Read from filesystem cache

2475

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2476

assert os.path.basename(func_id) == func_id

2477

2478

cache_spec = self.cache.load('youtube-sigfuncs', func_id)

2479

if cache_spec is not None:

2480

return lambda s: ''.join(s[i] for i in cache_spec)

2481

2482

code = self._load_player(video_id, player_url)

2483

if code:

2484

res = self._parse_sig_js(code)

2485

2486

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2487

cache_res = res(test_string)

2488

cache_spec = [ord(c) for c in cache_res]

2489

2490

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2491

return res

2492

2493

def _print_sig_code(self, func, example_sig):

2494

if not self.get_param('youtube_print_sig_code'):

2495

return

2496

2497

def gen_sig_code(idxs):

2498

def _genslice(start, end, step):

2499

starts = '' if start == 0 else str(start)

2500

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2501

steps = '' if step == 1 else (':%d' % step)

2502

return f's[{starts}{ends}{steps}]'

2503

2504

step = None

2505

# Quelch pyflakes warnings - start will be set when step is set

2506

start = '(Never used)'

2507

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2512

step = None

2513

continue

2514

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2524

2525

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2526

cache_res = func(test_string)

2527

cache_spec = [ord(c) for c in cache_res]

2528

expr_code = ' + '.join(gen_sig_code(cache_spec))

2529

signature_id_tuple = '(%s)' % (

2530

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2531

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2532

' return %s\n') % (signature_id_tuple, expr_code)

2533

self.to_screen('Extracted signature function:\n' + code)

2534

2535

def _parse_sig_js(self, jscode):

2536

funcname = self._search_regex(

2537

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2538

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2539

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2540

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2541

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2542

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2543

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2544

# Obsolete patterns

2545

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2546

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2547

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2548

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2549

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2550

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2551

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2552

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2553

jscode, 'Initial JS player signature function name', group='sig')

2554

2555

jsi = JSInterpreter(jscode)

2556

initial_function = jsi.extract_function(funcname)

2557

return lambda s: initial_function([s])

2558

2559

def _decrypt_signature(self, s, video_id, player_url):

2560

"""Turn the encrypted s field into a working signature"""

2561

try:

2562

player_id = (player_url, self._signature_cache_id(s))

2563

if player_id not in self._player_cache:

2564

func = self._extract_signature_function(video_id, player_url, s)

2565

self._player_cache[player_id] = func

2566

func = self._player_cache[player_id]

2567

self._print_sig_code(func, s)

2568

return func(s)

2569

except Exception as e:

2570

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2571

2572

def _decrypt_nsig(self, s, video_id, player_url):

2573

"""Turn the encrypted n field into a working signature"""

2574

if player_url is None:

2575

raise ExtractorError('Cannot decrypt nsig without player_url')

2576

player_url = urljoin('https://www.youtube.com', player_url)

2577

2578

sig_id = ('nsig_value', s)

2579

if sig_id in self._player_cache:

2580

return self._player_cache[sig_id]

2581

2582

try:

2583

player_id = ('nsig', player_url)

2584

if player_id not in self._player_cache:

2585

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2586

func = self._player_cache[player_id]

2587

self._player_cache[sig_id] = func(s)

2588

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2589

return self._player_cache[sig_id]

2590

except Exception as e:

2591

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2592

2593

def _extract_n_function_name(self, jscode):

2594

nfunc, idx = self._search_regex(

2595

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2596

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2597

if not idx:

2598

return nfunc

2599

return json.loads(js_to_json(self._search_regex(

2600

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2601

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2602

2603

def _extract_n_function(self, video_id, player_url):

2604

player_id = self._extract_player_info(player_url)

2605

func_code = self.cache.load('youtube-nsig', player_id)

2606

2607

if func_code:

2608

jsi = JSInterpreter(func_code)

2609

else:

2610

jscode = self._load_player(video_id, player_url)

2611

funcname = self._extract_n_function_name(jscode)

2612

jsi = JSInterpreter(jscode)

2613

func_code = jsi.extract_function_code(funcname)

2614

self.cache.store('youtube-nsig', player_id, func_code)

2615

2616

if self.get_param('youtube_print_sig_code'):

2617

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2618

2619

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2620

2621

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2622

"""

2623

Extract signatureTimestamp (sts)

2624

Required to tell API what sig/player version is in use.

2625

"""

2626

sts = None

2627

if isinstance(ytcfg, dict):

2628

sts = int_or_none(ytcfg.get('STS'))

2629

2630

if not sts:

2631

# Attempt to extract from player

2632

if player_url is None:

2633

error_msg = 'Cannot extract signature timestamp without player_url.'

2634

if fatal:

2635

raise ExtractorError(error_msg)

2636

self.report_warning(error_msg)

2637

return

2638

code = self._load_player(video_id, player_url, fatal=fatal)

2639

if code:

2640

sts = int_or_none(self._search_regex(

2641

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2642

'JS player signature timestamp', group='sts', fatal=fatal))

2643

return sts

2644

2645

def _mark_watched(self, video_id, player_responses):

2646

playback_url = get_first(

2647

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2648

expected_type=url_or_none)

2649

if not playback_url:

2650

self.report_warning('Unable to mark watched')

2651

return

2652

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2653

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2654

2655

# cpn generation algorithm is reverse engineered from base.js.

2656

# In fact it works even with dummy cpn.

2657

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2658

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2665

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2666

2667

self._download_webpage(

2668

playback_url, video_id, 'Marking watched',

2669

'Unable to mark watched', fatal=False)

2670

2671

@staticmethod

2672

def _extract_urls(webpage):

2673

# Embedded YouTube player

2674

entries = [

2675

unescapeHTML(mobj.group('url'))

2676

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2687

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2688

\1''', webpage)]

2689

2690

# lazyYT YouTube embed

2691

entries.extend(list(map(

2692

unescapeHTML,

2693

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2694

2695

# Wordpress "YouTube Video Importer" plugin

2696

matches = re.findall(r'''(?x)<div[^>]+

2697

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2698

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2699

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2705

urls = YoutubeIE._extract_urls(webpage)

2706

return urls[0] if urls else None

2707

2708

@classmethod

2709

def extract_id(cls, url):

2710

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2711

if mobj is None:

2712

raise ExtractorError('Invalid URL: %s' % url)

2713

return mobj.group('id')

2714

2715

def _extract_chapters_from_json(self, data, duration):

2716

chapter_list = traverse_obj(

2717

data, (

2718

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2719

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2720

), expected_type=list)

2721

2722

return self._extract_chapters(

2723

chapter_list,

2724

chapter_time=lambda chapter: float_or_none(

2725

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2726

chapter_title=lambda chapter: traverse_obj(

2727

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2728

duration=duration)

2729

2730

def _extract_chapters_from_engagement_panel(self, data, duration):

2731

content_list = traverse_obj(

2732

data,

2733

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2734

expected_type=list, default=[])

2735

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2736

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2737

2738

return next(filter(None, (

2739

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2740

chapter_time, chapter_title, duration)

2741

for contents in content_list)), [])

2742

2743

def _extract_chapters_from_description(self, description, duration):

2744

return self._extract_chapters(

2745

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2746

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2747

duration=duration, strict=False)

2748

2749

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2754

'title': chapter_title(chapter),

2755

} for chapter in chapter_list or []]

2756

if not strict:

2757

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2758

2759

chapters = [{'start_time': 0, 'title': '<Untitled>'}]

2760

for idx, chapter in enumerate(chapter_list):

2761

if chapter['start_time'] is None or not chapter['title']:

2762

self.report_warning(f'Incomplete chapter {idx}')

2763

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2764

chapters[-1]['end_time'] = chapter['start_time']

2765

chapters.append(chapter)

2766

else:

2767

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2768

chapters[-1]['end_time'] = duration

2769

return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]

2770

2771

def _extract_comment(self, comment_renderer, parent=None):

2772

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2777

2778

# note: timestamp is an estimate calculated from the current time and time_text

2779

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2780

author = self._get_text(comment_renderer, 'authorText')

2781

author_id = try_get(comment_renderer,

2782

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2783

2784

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2785

lambda x: x['likeCount']), compat_str)) or 0

2786

author_thumbnail = try_get(comment_renderer,

2787

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2788

2789

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2790

is_favorited = 'creatorHeart' in (try_get(

2791

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2796

'time_text': time_text,

2797

'like_count': votes,

2798

'is_favorited': is_favorited,

2799

'author': author,

2800

'author_id': author_id,

2801

'author_thumbnail': author_thumbnail,

2802

'author_is_uploader': author_is_uploader,

2803

'parent': parent or 'root'

2804

}

2805

2806

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2807

2808

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2809

2810

def extract_header(contents):

2811

_continuation = None

2812

for content in contents:

2813

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2814

expected_comment_count = self._get_count(

2815

comments_header_renderer, 'countText', 'commentsCount')

2816

2817

if expected_comment_count:

2818

tracker['est_total'] = expected_comment_count

2819

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2820

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2821

2822

sort_menu_item = try_get(

2823

comments_header_renderer,

2824

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2825

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2826

2827

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2828

if not _continuation:

2829

continue

2830

2831

sort_text = str_or_none(sort_menu_item.get('title'))

2832

if not sort_text:

2833

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2834

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2839

if not parent:

2840

tracker['current_page_thread'] = 0

2841

for content in contents:

2842

if not parent and tracker['total_parent_comments'] >= max_parents:

2843

yield

2844

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2845

comment_renderer = get_first(

2846

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2847

expected_type=dict, default={})

2848

2849

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2854

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2855

yield comment

2856

2857

# Attempt to get the replies

2858

comment_replies_renderer = try_get(

2859

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2860

2861

if comment_replies_renderer:

2862

tracker['current_page_thread'] += 1

2863

comment_entries_iter = self._comment_entries(

2864

comment_replies_renderer, ytcfg, video_id,

2865

parent=comment.get('id'), tracker=tracker)

2866

yield from itertools.islice(comment_entries_iter, min(

2867

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2868

2869

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2875

total_parent_comments=0,

2876

total_reply_comments=0)

2877

2878

# TODO: Deprecated

2879

# YouTube comments have a max depth of 2

2880

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2881

if max_depth:

2882

self._downloader.deprecation_warning(

2883

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2884

if max_depth == 1 and parent:

2885

return

2886

2887

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2888

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2889

2890

continuation = self._extract_continuation(root_continuation_data)

2891

2892

response = None

2893

is_forced_continuation = False

2894

is_first_continuation = parent is None

2895

if is_first_continuation and not continuation:

2896

# Sometimes you can get comments by generating the continuation yourself,

2897

# even if YouTube initially reports them being disabled - e.g. stories comments.

2898

# Note: if the comment section is actually disabled, YouTube may return a response with

2899

# required check_get_keys missing. So we will disable that check initially in this case.

2900

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2901

is_forced_continuation = True

2902

2903

for page_num in itertools.count(0):

2904

if not continuation:

2905

break

2906

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2907

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2908

if page_num == 0:

2909

if is_first_continuation:

2910

note_prefix = 'Downloading comment section API JSON'

2911

else:

2912

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2913

tracker['current_page_thread'], comment_prog_str)

2914

else:

2915

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2916

' ' if parent else '', ' replies' if parent else '',

2917

page_num, comment_prog_str)

2918

2919

response = self._extract_response(

2920

item_id=None, query=continuation,

2921

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2922

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2923

is_forced_continuation = False

2924

continuation_contents = traverse_obj(

2925

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2926

2927

continuation = None

2928

for continuation_section in continuation_contents:

2929

continuation_items = traverse_obj(

2930

continuation_section,

2931

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2932

get_all=False, expected_type=list) or []

2933

if is_first_continuation:

2934

continuation = extract_header(continuation_items)

2935

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2949

if message and not parent and tracker['running_total'] == 0:

2950

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2951

2952

@staticmethod

2953

def _generate_comment_continuation(video_id):

2954

"""

2955

Generates initial comment section continuation token from given video id

2956

"""

2957

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2958

return base64.b64encode(token.encode()).decode()

2959

2960

def _get_comments(self, ytcfg, video_id, contents, webpage):

2961

"""Entry for comment extraction"""

2962

def _real_comment_extract(contents):

2963

renderer = next((

2964

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2965

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2966

yield from self._comment_entries(renderer, ytcfg, video_id)

2967

2968

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2969

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2970

2971

@staticmethod

2972

def _get_checkok_params():

2973

return {'contentCheckOk': True, 'racyCheckOk': True}

2974

2975

@classmethod

2976

def _generate_player_context(cls, sts=None):

2977

context = {

2978

'html5Preference': 'HTML5_PREF_WANTS',

2979

}

2980

if sts is not None:

2981

context['signatureTimestamp'] = sts

2982

return {

2983

'playbackContext': {

2984

'contentPlaybackContext': context

2985

},

2986

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2991

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2992

return True

2993

2994

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2995

AGE_GATE_REASONS = (

2996

'confirm your age', 'age-restricted', 'inappropriate', # reason

2997

'age_verification_required', 'age_check_required', # status

2998

)

2999

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3000

3001

@staticmethod

3002

def _is_unplayable(player_response):

3003

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3004

3005

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3006

3007

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3008

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3009

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3010

headers = self.generate_api_headers(

3011

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3016

}

3017

yt_query.update(self._generate_player_context(sts))

3018

return self._extract_response(

3019

item_id=video_id, ep='player', query=yt_query,

3020

ytcfg=player_ytcfg, headers=headers, fatal=True,

3021

default_client=client,

3022

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3023

) or None

3024

3025

def _get_requested_clients(self, url, smuggled_data):

3026

requested_clients = []

3027

default = ['android', 'web']

3028

allowed_clients = sorted(

3029

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3030

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3031

for client in self._configuration_arg('player_client'):

3032

if client in allowed_clients:

3033

requested_clients.append(client)

3034

elif client == 'default':

3035

requested_clients.extend(default)

3036

elif client == 'all':

3037

requested_clients.extend(allowed_clients)

3038

else:

3039

self.report_warning(f'Skipping unsupported client {client}')

3040

if not requested_clients:

3041

requested_clients = default

3042

3043

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3044

requested_clients.extend(

3045

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3046

3047

return orderedSet(requested_clients)

3048

3049

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3050

initial_pr = None

3051

if webpage:

3052

initial_pr = self._search_json(

3053

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3054

3055

all_clients = set(clients)

3056

clients = clients[::-1]

3057

prs = []

3058

3059

def append_client(*client_names):

3060

""" Append the first client name that exists but not already used """

3061

for client_name in client_names:

3062

actual_client = _split_innertube_client(client_name)[0]

3063

if actual_client in INNERTUBE_CLIENTS:

3064

if actual_client not in all_clients:

3065

clients.append(client_name)

3066

all_clients.add(actual_client)

3067

return

3068

3069

# Android player_response does not have microFormats which are needed for

3070

# extraction of some data. So we return the initial_pr with formats

3071

# stripped out even if not requested by the user

3072

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3073

if initial_pr:

3074

pr = dict(initial_pr)

3075

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3080

player_url = None

3081

while clients:

3082

client, base_client, variant = _split_innertube_client(clients.pop())

3083

player_ytcfg = master_ytcfg if client == 'web' else {}

3084

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3085

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3086

3087

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3088

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3089

if 'js' in self._configuration_arg('player_skip'):

3090

require_js_player = False

3091

player_url = None

3092

3093

if not player_url and not tried_iframe_fallback and require_js_player:

3094

player_url = self._download_player_url(video_id)

3095

tried_iframe_fallback = True

3096

3097

try:

3098

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3099

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3100

except ExtractorError as e:

3101

if last_error:

3102

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3110

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3111

append_client(f'{base_client}_creator')

3112

elif self._is_agegated(pr):

3113

if variant == 'tv_embedded':

3114

append_client(f'{base_client}_embedded')

3115

elif not variant:

3116

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3122

return prs, player_url

3123

3124

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3125

itags, stream_ids = {}, []

3126

itag_qualities, res_qualities = {}, {}

3127

q = qualities([

3128

# Normally tiny is the smallest video-only formats. But

3129

# audio-only formats with unknown quality may get tagged as tiny

3130

'tiny',

3131

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3132

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3133

])

3134

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3135

3136

for fmt in streaming_formats:

3137

if fmt.get('targetDurationSec'):

3138

continue

3139

3140

itag = str_or_none(fmt.get('itag'))

3141

audio_track = fmt.get('audioTrack') or {}

3142

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3143

if stream_id in stream_ids:

3144

continue

3145

3146

quality = fmt.get('quality')

3147

height = int_or_none(fmt.get('height'))

3148

if quality == 'tiny' or not quality:

3149

quality = fmt.get('audioQuality', '').lower() or quality

3150

# The 3gp format (17) in android client has a quality of "small",

3151

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3157

if height:

3158

res_qualities[height] = quality

3159

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3160

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3161

# number of fragment that would subsequently requested with (`&sq=N`)

3162

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3163

continue

3164

3165

fmt_url = fmt.get('url')

3166

if not fmt_url:

3167

sc = compat_parse_qs(fmt.get('signatureCipher'))

3168

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3169

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3170

if not all((sc, fmt_url, player_url, encrypted_sig)):

3171

continue

3172

try:

3173

fmt_url += '&%s=%s' % (

3174

traverse_obj(sc, ('sp', -1)) or 'signature',

3175

self._decrypt_signature(encrypted_sig, video_id, player_url)

3176

)

3177

except ExtractorError as e:

3178

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3179

self.write_debug(e, only_once=True)

3180

continue

3181

3182

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3187

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3188

except ExtractorError as e:

3189

self.report_warning(

3190

'nsig extraction failed: You may experience throttling for some formats\n'

3191

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3192

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3197

stream_ids.append(stream_id)

3198

3199

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3200

language_preference = (

3201

10 if audio_track.get('audioIsDefault') and 10

3202

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3203

else -1)

3204

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3205

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3206

# Make sure to avoid false positives with small duration differences.

3207

# Eg: __2ABJjxzNo, ySuUZEjARPY

3208

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3209

if is_damaged:

3210

self.report_warning(

3211

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3212

dct = {

3213

'asr': int_or_none(fmt.get('audioSampleRate')),

3214

'filesize': int_or_none(fmt.get('contentLength')),

3215

'format_id': itag,

3216

'format_note': join_nonempty(

3217

'%s%s' % (audio_track.get('displayName') or '',

3218

' (default)' if language_preference > 0 else ''),

3219

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3220

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3221

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3222

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3223

'fps': int_or_none(fmt.get('fps')) or None,

3224

'height': height,

3225

'quality': q(quality),

3226

'has_drm': bool(fmt.get('drmFamilies')),

3227

'tbr': tbr,

3228

'url': fmt_url,

3229

'width': int_or_none(fmt.get('width')),

3230

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3231

'desc' if language_preference < -1 else ''),

3232

'language_preference': language_preference,

3233

# Strictly de-prioritize damaged and 3gp formats

3234

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3235

}

3236

mime_mobj = re.match(

3237

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3238

if mime_mobj:

3239

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3240

dct.update(parse_codecs(mime_mobj.group(2)))

3241

no_audio = dct.get('acodec') == 'none'

3242

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3248

dct['downloader_options'] = {

3249

# Youtube throttles chunks >~10M

3250

'http_chunk_size': 10485760,

3251

}

3252

if dct.get('ext'):

3253

dct['container'] = dct['ext'] + '_dash'

3254

yield dct

3255

3256

live_from_start = is_live and self.get_param('live_from_start')

3257

skip_manifests = self._configuration_arg('skip')

3258

if not self.get_param('youtube_include_hls_manifest', True):

3259

skip_manifests.append('hls')

3260

if not self.get_param('youtube_include_dash_manifest', True):

3261

skip_manifests.append('dash')

3262

get_dash = 'dash' not in skip_manifests and (

3263

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3264

get_hls = not live_from_start and 'hls' not in skip_manifests

3265

3266

def process_manifest_format(f, proto, itag):

3267

if itag in itags:

3268

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3269

return False

3270

itag = f'{itag}-{proto}'

3271

if itag:

3272

f['format_id'] = itag

3273

itags[itag] = proto

3274

3275

f['quality'] = next((

3276

q(qdict[val])

3277

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3278

if val in qdict), -1)

3279

return True

3280

3281

for sd in streaming_data:

3282

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3283

if hls_manifest_url:

3284

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3285

if process_manifest_format(f, 'hls', self._search_regex(

3286

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3287

yield f

3288

3289

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3290

if dash_manifest_url:

3291

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3292

if process_manifest_format(f, 'dash', f['format_id']):

3293

f['filesize'] = int_or_none(self._search_regex(

3294

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3295

if live_from_start:

3296

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3301

spec = get_first(

3302

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3303

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3308

args = args.split('#')

3309

counts = list(map(int_or_none, args[:5]))

3310

if len(args) != 8 or not all(counts):

3311

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3312

continue

3313

width, height, frame_count, cols, rows = counts

3314

N, sigh = args[6:]

3315

3316

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3317

fragment_count = frame_count / (cols * rows)

3318

fragment_duration = duration / fragment_count

3319

yield {

3320

'format_id': f'sb{i}',

3321

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3331

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3332

} for j in range(math.ceil(fragment_count))],

3333

}

3334

3335

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3336

webpage = None

3337

if 'webpage' not in self._configuration_arg('player_skip'):

3338

webpage = self._download_webpage(

3339

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3340

3341

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3342

3343

player_responses, player_url = self._extract_player_responses(

3344

self._get_requested_clients(url, smuggled_data),

3345

video_id, webpage, master_ytcfg)

3346

3347

return webpage, master_ytcfg, player_responses, player_url

3348

3349

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3350

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3351

is_live = get_first(video_details, 'isLive')

3352

if is_live is None:

3353

is_live = get_first(live_broadcast_details, 'isLiveNow')

3354

3355

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3356

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3357

3358

return live_broadcast_details, is_live, streaming_data, formats

3359

3360

def _real_extract(self, url):

3361

url, smuggled_data = unsmuggle_url(url, {})

3362

video_id = self._match_id(url)

3363

3364

base_url = self.http_scheme() + '//www.youtube.com/'

3365

webpage_url = base_url + 'watch?v=' + video_id

3366

3367

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3368

3369

playability_statuses = traverse_obj(

3370

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3371

3372

trailer_video_id = get_first(

3373

playability_statuses,

3374

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3375

expected_type=str)

3376

if trailer_video_id:

3377

return self.url_result(

3378

trailer_video_id, self.ie_key(), trailer_video_id)

3379

3380

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3381

if webpage else (lambda x: None))

3382

3383

video_details = traverse_obj(

3384

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3385

microformats = traverse_obj(

3386

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3387

expected_type=dict, default=[])

3388

video_title = (

3389

get_first(video_details, 'title')

3390

or self._get_text(microformats, (..., 'title'))

3391

or search_meta(['og:title', 'twitter:title', 'title']))

3392

video_description = get_first(video_details, 'shortDescription')

3393

3394

multifeed_metadata_list = get_first(

3395

player_responses,

3396

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3397

expected_type=str)

3398

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3399

if self.get_param('noplaylist'):

3400

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3405

# Unquote should take place before split on comma (,) since textual

3406

# fields may contain comma as well (see

3407

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3408

feed_data = compat_parse_qs(

3409

compat_urllib_parse_unquote_plus(feed))

3410

3411

def feed_entry(name):

3412

return try_get(

3413

feed_data, lambda x: x[name][0], compat_str)

3414

3415

feed_id = feed_entry('id')

3416

if not feed_id:

3417

continue

3418

feed_title = feed_entry('title')

3419

title = video_title

3420

if feed_title:

3421

title += ' (%s)' % feed_title

3422

entries.append({

3423

'_type': 'url_transparent',

3424

'ie_key': 'Youtube',

3425

'url': smuggle_url(

3426

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3427

{'force_singlefeed': True}),

3428

'title': title,

3429

})

3430

feed_ids.append(feed_id)

3431

self.to_screen(

3432

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3433

% (', '.join(feed_ids), video_id))

3434

return self.playlist_result(

3435

entries, video_id, video_title, video_description)

3436

3437

duration = int_or_none(

3438

get_first(video_details, 'lengthSeconds')

3439

or get_first(microformats, 'lengthSeconds')

3440

or parse_duration(search_meta('duration'))) or None

3441

3442

if get_first(video_details, 'isPostLiveDvr'):

3443

self.write_debug('Video is in Post-Live Manifestless mode')

3444

if duration or 0 > 4 * 3600:

3445

self.report_warning(

3446

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3447

'This is a known issue and patches are welcome')

3448

3449

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3450

video_id, microformats, video_details, player_responses, player_url, duration)

3451

3452

if not formats:

3453

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3454

self.report_drm(video_id)

3455

pemr = get_first(

3456

playability_statuses,

3457

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3458

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3459

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3460

if subreason:

3461

if subreason == 'The uploader has not made this video available in your country.':

3462

countries = get_first(microformats, 'availableCountries')

3463

if not countries:

3464

regions_allowed = search_meta('regionsAllowed')

3465

countries = regions_allowed.split(',') if regions_allowed else None

3466

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3467

reason += f'. {subreason}'

3468

if reason:

3469

self.raise_no_formats(reason, expected=True)

3470

3471

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3472

if not keywords and webpage:

3473

keywords = [

3474

unescapeHTML(m.group('content'))

3475

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3476

for keyword in keywords:

3477

if keyword.startswith('yt:stretch='):

3478

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3479

if mobj:

3480

# NB: float is intentional for forcing float division

3481

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3486

f['stretched_ratio'] = ratio

3487

break

3488

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3489

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3490

if thumbnail_url:

3491

thumbnails.append({

3492

'url': thumbnail_url,

3493

})

3494

original_thumbnails = thumbnails.copy()

3495

3496

# The best resolution thumbnails sometimes does not appear in the webpage

3497

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3498

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3499

thumbnail_names = [

3500

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3501

# in resolution, these are not the custom thumbnail. So de-prioritize them

3502

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3503

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3504

]

3505

n_thumbnail_names = len(thumbnail_names)

3506

thumbnails.extend({

3507

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3508

video_id=video_id, name=name, ext=ext,

3509

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3510

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3511

for thumb in thumbnails:

3512

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3513

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3514

self._remove_duplicate_formats(thumbnails)

3515

self._downloader._sort_thumbnails(original_thumbnails)

3516

3517

category = get_first(microformats, 'category') or search_meta('genre')

3518

channel_id = str_or_none(

3519

get_first(video_details, 'channelId')

3520

or get_first(microformats, 'externalChannelId')

3521

or search_meta('channelId'))

3522

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3523

3524

live_content = get_first(video_details, 'isLiveContent')

3525

is_upcoming = get_first(video_details, 'isUpcoming')

3526

if is_live is None:

3527

if is_upcoming or live_content is False:

3528

is_live = False

3529

if is_upcoming is None and (live_content or is_live):

3530

is_upcoming = False

3531

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3532

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3533

if not duration and live_end_time and live_start_time:

3534

duration = live_end_time - live_start_time

3535

3536

if is_live and self.get_param('live_from_start'):

3537

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3538

3539

formats.extend(self._extract_storyboard(player_responses, duration))

3540

3541

# Source is given priority since formats that throttle are given lower source_preference

3542

# When throttling issue is fully fixed, remove this

3543

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3548

'formats': formats,

3549

'thumbnails': thumbnails,

3550

# The best thumbnail that we are sure exists. Prevents unnecessary

3551

# URL checking if user don't care about getting the best possible thumbnail

3552

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3553

'description': video_description,

3554

'uploader': get_first(video_details, 'author'),

3555

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3556

'uploader_url': owner_profile_url,

3557

'channel_id': channel_id,

3558

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3559

'duration': duration,

3560

'view_count': int_or_none(

3561

get_first((video_details, microformats), (..., 'viewCount'))

3562

or search_meta('interactionCount')),

3563

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3564

'age_limit': 18 if (

3565

get_first(microformats, 'isFamilySafe') is False

3566

or search_meta('isFamilyFriendly') == 'false'

3567

or search_meta('og:restrictions:age') == '18+') else 0,

3568

'webpage_url': webpage_url,

3569

'categories': [category] if category else None,

3570

'tags': keywords,

3571

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3572

'is_live': is_live,

3573

'was_live': (False if is_live or is_upcoming or live_content is False

3574

else None if is_live is None or is_upcoming is None

3575

else live_content),

3576

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3577

'release_timestamp': live_start_time,

3578

}

3579

3580

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3581

if pctr:

3582

def get_lang_code(track):

3583

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3584

or track.get('languageCode'))

3585

3586

# Converted into dicts to remove duplicates

3587

captions = {

3588

get_lang_code(sub): sub

3589

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3590

translation_languages = {

3591

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3592

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3593

3594

def process_language(container, base_url, lang_code, sub_name, query):

3595

lang_subs = container.setdefault(lang_code, [])

3596

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3607

for lang_code, caption_track in captions.items():

3608

base_url = caption_track.get('baseUrl')

3609

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3610

if not base_url:

3611

continue

3612

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3613

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3618

if not caption_track.get('isTranslatable'):

3619

continue

3620

for trans_code, trans_name in translation_languages.items():

3621

if not trans_code:

3622

continue

3623

orig_trans_code = trans_code

3624

if caption_track.get('kind') != 'asr':

3625

if 'translated_subs' in self._configuration_arg('skip'):

3626

continue

3627

trans_code += f'-{lang_code}'

3628

trans_name += format_field(lang_name, None, ' from %s')

3629

# Add an "-orig" label to the original language so that it can be distinguished.

3630

# The subs are returned without "-orig" as well for compatibility

3631

if lang_code == f'a-{orig_trans_code}':

3632

process_language(

3633

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3634

# Setting tlang=lang returns damaged subtitles.

3635

process_language(automatic_captions, base_url, trans_code, trans_name,

3636

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3637

info['automatic_captions'] = automatic_captions

3638

info['subtitles'] = subtitles

3639

3640

parsed_url = compat_urllib_parse_urlparse(url)

3641

for component in [parsed_url.fragment, parsed_url.query]:

3642

query = compat_parse_qs(component)

3643

for k, v in query.items():

3644

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3645

d_k += '_time'

3646

if d_k not in info and k in s_ks:

3647

info[d_k] = parse_duration(query[k][0])

3648

3649

# Youtube Music Auto-generated description

3650

if video_description:

3651

mobj = re.search(

3652

r'''(?xs)

3653

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3654

(?P<album>[^\n]+)

3655

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3656

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3657

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3658

.+\nAuto-generated\ by\ YouTube\.\s*$

3659

''', video_description)

3660

if mobj:

3661

release_year = mobj.group('release_year')

3662

release_date = mobj.group('release_date')

3663

if release_date:

3664

release_date = release_date.replace('-', '')

3665

if not release_year:

3666

release_year = release_date[:4]

3667

info.update({

3668

'album': mobj.group('album'.strip()),

3669

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3670

'track': mobj.group('track').strip(),

3671

'release_date': release_date,

3672

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3678

if not initial_data:

3679

query = {'videoId': video_id}

3680

query.update(self._get_checkok_params())

3681

initial_data = self._extract_response(

3682

item_id=video_id, ep='next', fatal=False,

3683

ytcfg=master_ytcfg, query=query,

3684

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3685

note='Downloading initial data API JSON')

3686

3687

info['comment_count'] = traverse_obj(initial_data, (

3688

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3689

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3690

), (

3691

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3692

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3693

), expected_type=int_or_none, get_all=False)

3694

3695

try: # This will error if there is no livechat

3696

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3697

except (KeyError, IndexError, TypeError):

3698

pass

3699

else:

3700

info.setdefault('subtitles', {})['live_chat'] = [{

3701

# url is needed to set cookies

3702

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3703

'video_id': video_id,

3704

'ext': 'json',

3705

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3711

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3712

or self._extract_chapters_from_description(video_description, duration)

3713

or None)

3714

3715

contents = traverse_obj(

3716

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3717

expected_type=list, default=[])

3718

3719

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3720

if vpir:

3721

stl = vpir.get('superTitleLink')

3722

if stl:

3723

stl = self._get_text(stl)

3724

if try_get(

3725

vpir,

3726

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3727

info['location'] = stl

3728

else:

3729

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3730

if mobj:

3731

info.update({

3732

'series': mobj.group(1),

3733

'season_number': int(mobj.group(2)),

3734

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3739

list) or []):

3740

tbr = tlb.get('toggleButtonRenderer') or {}

3741

for getter, regex in [(

3742

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3743

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3744

lambda x: x['accessibility'],

3745

lambda x: x['accessibilityData']['accessibilityData'],

3746

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3747

label = (try_get(tbr, getter, dict) or {}).get('label')

3748

if label:

3749

mobj = re.match(regex, label)

3750

if mobj:

3751

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3752

break

3753

sbr_tooltip = try_get(

3754

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3755

if sbr_tooltip:

3756

like_count, dislike_count = sbr_tooltip.split(' / ')

3757

info.update({

3758

'like_count': str_to_int(like_count),

3759

'dislike_count': str_to_int(dislike_count),

3760

})

3761

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3762

if vsir:

3763

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3764

info.update({

3765

'channel': self._get_text(vor, 'title'),

3766

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3771

list) or []

3772

multiple_songs = False

3773

for row in rows:

3774

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3775

multiple_songs = True

3776

break

3777

for row in rows:

3778

mrr = row.get('metadataRowRenderer') or {}

3779

mrr_title = mrr.get('title')

3780

if not mrr_title:

3781

continue

3782

mrr_title = self._get_text(mrr, 'title')

3783

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3784

if mrr_title == 'License':

3785

info['license'] = mrr_contents_text

3786

elif not multiple_songs:

3787

if mrr_title == 'Album':

3788

info['album'] = mrr_contents_text

3789

elif mrr_title == 'Artist':

3790

info['artist'] = mrr_contents_text

3791

elif mrr_title == 'Song':

3792

info['track'] = mrr_contents_text

3793

3794

fallbacks = {

3795

'channel': 'uploader',

3796

'channel_id': 'uploader_id',

3797

'channel_url': 'uploader_url',

3798

}

3799

3800

# The upload date for scheduled, live and past live streams / premieres in microformats

3801

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3802

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3803

upload_date = (

3804

unified_strdate(get_first(microformats, 'uploadDate'))

3805

or unified_strdate(search_meta('uploadDate')))

3806

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3807

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3808

info['upload_date'] = upload_date

3809

3810

for to, frm in fallbacks.items():

3811

if not info.get(to):

3812

info[to] = info.get(frm)

3813

3814

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3820

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3821

is_membersonly = None

3822

is_premium = None

3823

if initial_data and is_private is not None:

3824

is_membersonly = False

3825

is_premium = False

3826

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3827

badge_labels = set()

3828

for content in contents:

3829

if not isinstance(content, dict):

3830

continue

3831

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3832

for badge_label in badge_labels:

3833

if badge_label.lower() == 'members only':

3834

is_membersonly = True

3835

elif badge_label.lower() == 'premium':

3836

is_premium = True

3837

elif badge_label.lower() == 'unlisted':

3838

is_unlisted = True

3839

3840

info['availability'] = self._availability(

3841

is_private=is_private,

3842

needs_premium=is_premium,

3843

needs_subscription=is_membersonly,

3844

needs_auth=info['age_limit'] >= 18,

3845

is_unlisted=None if is_private is None else is_unlisted)

3846

3847

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3848

3849

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3855

3856

@staticmethod

3857

def passthrough_smuggled_data(func):

3858

def _smuggle(entries, smuggled_data):

3859

for entry in entries:

3860

# TODO: Convert URL to music.youtube instead.

3861

# Do we need to passthrough any other smuggled_data?

3862

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3863

yield entry

3864

3865

@functools.wraps(func)

3866

def wrapper(self, url):

3867

url, smuggled_data = unsmuggle_url(url, {})

3868

if self.is_music_url(url):

3869

smuggled_data['is_music_url'] = True

3870

info_dict = func(self, url, smuggled_data)

3871

if smuggled_data and info_dict.get('entries'):

3872

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3877

channel_id = self._html_search_meta(

3878

'channelId', webpage, 'channel id', default=None)

3879

if channel_id:

3880

return channel_id

3881

channel_url = self._html_search_meta(

3882

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3883

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3884

'twitter:app:url:googleplay'), webpage, 'channel url')

3885

return self._search_regex(

3886

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3887

channel_url, 'channel id')

3888

3889

@staticmethod

3890

def _extract_basic_item_renderer(item):

3891

# Modified from _extract_grid_item_renderer

3892

known_basic_renderers = (

3893

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3894

)

3895

for key, renderer in item.items():

3896

if not isinstance(renderer, dict):

3897

continue

3898

elif key in known_basic_renderers:

3899

return renderer

3900

elif key.startswith('grid') and key.endswith('Renderer'):

3901

return renderer

3902

3903

def _grid_entries(self, grid_renderer):

3904

for item in grid_renderer['items']:

3905

if not isinstance(item, dict):

3906

continue

3907

renderer = self._extract_basic_item_renderer(item)

3908

if not isinstance(renderer, dict):

3909

continue

3910

title = self._get_text(renderer, 'title')

3911

3912

# playlist

3913

playlist_id = renderer.get('playlistId')

3914

if playlist_id:

3915

yield self.url_result(

3916

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3917

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3922

if video_id:

3923

yield self._extract_video(renderer)

3924

continue

3925

# channel

3926

channel_id = renderer.get('channelId')

3927

if channel_id:

3928

yield self.url_result(

3929

'https://www.youtube.com/channel/%s' % channel_id,

3930

ie=YoutubeTabIE.ie_key(), video_title=title)

3931

continue

3932

# generic endpoint URL support

3933

ep_url = urljoin('https://www.youtube.com/', try_get(

3934

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3935

compat_str))

3936

if ep_url:

3937

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3938

if ie.suitable(ep_url):

3939

yield self.url_result(

3940

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3941

break

3942

3943

def _music_reponsive_list_entry(self, renderer):

3944

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3945

if video_id:

3946

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3947

ie=YoutubeIE.ie_key(), video_id=video_id)

3948

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3949

if playlist_id:

3950

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3951

if video_id:

3952

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3953

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3954

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3955

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3956

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3957

if browse_id:

3958

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3959

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3960

3961

def _shelf_entries_from_content(self, shelf_renderer):

3962

content = shelf_renderer.get('content')

3963

if not isinstance(content, dict):

3964

return

3965

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3966

if renderer:

3967

# TODO: add support for nested playlists so each shelf is processed

3968

# as separate playlist

3969

# TODO: this includes only first N items

3970

yield from self._grid_entries(renderer)

3971

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3977

ep = try_get(

3978

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3979

compat_str)

3980

shelf_url = urljoin('https://www.youtube.com', ep)

3981

if shelf_url:

3982

# Skipping links to another channels, note that checking for

3983

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3984

# will not work

3985

if skip_channels and '/channels?' in shelf_url:

3986

return

3987

title = self._get_text(shelf_renderer, 'title')

3988

yield self.url_result(shelf_url, video_title=title)

3989

# Shelf may not contain shelf URL, fallback to extraction from content

3990

yield from self._shelf_entries_from_content(shelf_renderer)

3991

3992

def _playlist_entries(self, video_list_renderer):

3993

for content in video_list_renderer['contents']:

3994

if not isinstance(content, dict):

3995

continue

3996

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3997

if not isinstance(renderer, dict):

3998

continue

3999

video_id = renderer.get('videoId')

4000

if not video_id:

4001

continue

4002

yield self._extract_video(renderer)

4003

4004

def _rich_entries(self, rich_grid_renderer):

4005

renderer = try_get(

4006

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4007

video_id = renderer.get('videoId')

4008

if not video_id:

4009

return

4010

yield self._extract_video(renderer)

4011

4012

def _video_entry(self, video_renderer):

4013

video_id = video_renderer.get('videoId')

4014

if video_id:

4015

return self._extract_video(video_renderer)

4016

4017

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4018

url = urljoin('https://youtube.com', traverse_obj(

4019

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4020

if url:

4021

return self.url_result(

4022

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4023

4024

def _post_thread_entries(self, post_thread_renderer):

4025

post_renderer = try_get(

4026

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4027

if not post_renderer:

4028

return

4029

# video attachment

4030

video_renderer = try_get(

4031

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4032

video_id = video_renderer.get('videoId')

4033

if video_id:

4034

entry = self._extract_video(video_renderer)

4035

if entry:

4036

yield entry

4037

# playlist attachment

4038

playlist_id = try_get(

4039

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

4040

if playlist_id:

4041

yield self.url_result(

4042

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4043

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4044

# inline video links

4045

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4046

for run in runs:

4047

if not isinstance(run, dict):

4048

continue

4049

ep_url = try_get(

4050

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

4051

if not ep_url:

4052

continue

4053

if not YoutubeIE.suitable(ep_url):

4054

continue

4055

ep_video_id = YoutubeIE._match_id(ep_url)

4056

if video_id == ep_video_id:

4057

continue

4058

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4059

4060

def _post_thread_continuation_entries(self, post_thread_continuation):

4061

contents = post_thread_continuation.get('contents')

4062

if not isinstance(contents, list):

4063

return

4064

for content in contents:

4065

renderer = content.get('backstagePostThreadRenderer')

4066

if isinstance(renderer, dict):

4067

yield from self._post_thread_entries(renderer)

4068

continue

4069

renderer = content.get('videoRenderer')

4070

if isinstance(renderer, dict):

4071

yield self._video_entry(renderer)

4072

4073

r''' # unused

4074

def _rich_grid_entries(self, contents):

4075

for content in contents:

4076

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4077

if video_renderer:

4078

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4084

# continuation_list is modified in-place with continuation_list = [continuation_token]

4085

continuation_list[:] = [None]

4086

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4087

for content in contents:

4088

if not isinstance(content, dict):

4089

continue

4090

is_renderer = traverse_obj(

4091

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4092

expected_type=dict)

4093

if not is_renderer:

4094

renderer = content.get('richItemRenderer')

4095

if renderer:

4096

for entry in self._rich_entries(renderer):

4097

yield entry

4098

continuation_list[0] = self._extract_continuation(parent_renderer)

4099

continue

4100

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4101

for isr_content in isr_contents:

4102

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4107

'gridRenderer': self._grid_entries,

4108

'reelShelfRenderer': self._grid_entries,

4109

'shelfRenderer': self._shelf_entries,

4110

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4111

'backstagePostThreadRenderer': self._post_thread_entries,

4112

'videoRenderer': lambda x: [self._video_entry(x)],

4113

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4114

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4115

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4116

}

4117

for key, renderer in isr_content.items():

4118

if key not in known_renderers:

4119

continue

4120

for entry in known_renderers[key](renderer):

4121

if entry:

4122

yield entry

4123

continuation_list[0] = self._extract_continuation(renderer)

4124

break

4125

4126

if not continuation_list[0]:

4127

continuation_list[0] = self._extract_continuation(is_renderer)

4128

4129

if not continuation_list[0]:

4130

continuation_list[0] = self._extract_continuation(parent_renderer)

4131

4132

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4133

continuation_list = [None]

4134

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4135

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4140

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4141

yield from extract_entries(parent_renderer)

4142

continuation = continuation_list[0]

4143

4144

for page_num in itertools.count(1):

4145

if not continuation:

4146

break

4147

headers = self.generate_api_headers(

4148

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4149

response = self._extract_response(

4150

item_id=f'{item_id} page {page_num}',

4151

query=continuation, headers=headers, ytcfg=ytcfg,

4152

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4157

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4158

visitor_data = self._extract_visitor_data(response) or visitor_data

4159

4160

known_continuation_renderers = {

4161

'playlistVideoListContinuation': self._playlist_entries,

4162

'gridContinuation': self._grid_entries,

4163

'itemSectionContinuation': self._post_thread_continuation_entries,

4164

'sectionListContinuation': extract_entries, # for feeds

4165

}

4166

continuation_contents = try_get(

4167

response, lambda x: x['continuationContents'], dict) or {}

4168

continuation_renderer = None

4169

for key, value in continuation_contents.items():

4170

if key not in known_continuation_renderers:

4171

continue

4172

continuation_renderer = value

4173

continuation_list = [None]

4174

yield from known_continuation_renderers[key](continuation_renderer)

4175

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4176

break

4177

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4182

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4183

'gridVideoRenderer': (self._grid_entries, 'items'),

4184

'gridChannelRenderer': (self._grid_entries, 'items'),

4185

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4186

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4187

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4188

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4189

}

4190

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4191

continuation_items = try_get(

4192

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4193

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4194

video_items_renderer = None

4195

for key, value in continuation_item.items():

4196

if key not in known_renderers:

4197

continue

4198

video_items_renderer = {known_renderers[key][1]: continuation_items}

4199

continuation_list = [None]

4200

yield from known_renderers[key][0](video_items_renderer)

4201

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4202

break

4203

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4209

for tab in tabs:

4210

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4211

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4216

4217

def _extract_uploader(self, data):

4218

uploader = {}

4219

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4220

owner = try_get(

4221

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4222

if owner:

4223

owner_text = owner.get('text')

4224

uploader['uploader'] = self._search_regex(

4225

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4226

uploader['uploader_id'] = try_get(

4227

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4228

uploader['uploader_url'] = urljoin(

4229

'https://www.youtube.com/',

4230

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4231

return {k: v for k, v in uploader.items() if v is not None}

4232

4233

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4234

playlist_id = title = description = channel_url = channel_name = channel_id = None

4235

tags = []

4236

4237

selected_tab = self._extract_selected_tab(tabs)

4238

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4239

renderer = try_get(

4240

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4241

if renderer:

4242

channel_name = renderer.get('title')

4243

channel_url = renderer.get('channelUrl')

4244

channel_id = renderer.get('externalId')

4245

else:

4246

renderer = try_get(

4247

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4248

4249

if renderer:

4250

title = renderer.get('title')

4251

description = renderer.get('description', '')

4252

playlist_id = channel_id

4253

tags = renderer.get('keywords', '').split()

4254

4255

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4256

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4257

def _get_uncropped(url):

4258

return url_or_none((url or '').split('=')[0] + '=s0')

4259

4260

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4261

if avatar_thumbnails:

4262

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4263

if uncropped_avatar:

4264

avatar_thumbnails.append({

4265

'url': uncropped_avatar,

4266

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4271

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4272

for banner in channel_banners:

4273

banner['preference'] = -10

4274

4275

if channel_banners:

4276

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4277

if uncropped_banner:

4278

channel_banners.append({

4279

'url': uncropped_banner,

4280

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4285

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4286

4287

if playlist_id is None:

4288

playlist_id = item_id

4289

4290

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4291

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4292

if title is None:

4293

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4294

title += format_field(selected_tab, 'title', ' - %s')

4295

title += format_field(selected_tab, 'expandedText', ' - %s')

4296

4297

metadata = {

4298

'playlist_id': playlist_id,

4299

'playlist_title': title,

4300

'playlist_description': description,

4301

'uploader': channel_name,

4302

'uploader_id': channel_id,

4303

'uploader_url': channel_url,

4304

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4305

'tags': tags,

4306

'view_count': self._get_count(playlist_stats, 1),

4307

'availability': self._extract_availability(data),

4308

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4309

'playlist_count': self._get_count(playlist_stats, 0),

4310

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4311

}

4312

if not channel_id:

4313

metadata.update(self._extract_uploader(data))

4314

metadata.update({

4315

'channel': metadata['uploader'],

4316

'channel_id': metadata['uploader_id'],

4317

'channel_url': metadata['uploader_url']})

4318

return self.playlist_result(

4319

self._entries(

4320

selected_tab, playlist_id, ytcfg,

4321

self._extract_account_syncid(ytcfg, data),

4322

self._extract_visitor_data(data, ytcfg)),

4323

**metadata)

4324

4325

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4326

first_id = last_id = response = None

4327

for page_num in itertools.count(1):

4328

videos = list(self._playlist_entries(playlist))

4329

if not videos:

4330

return

4331

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4332

if start >= len(videos):

4333

return

4334

yield from videos[start:]

4335

first_id = first_id or videos[0]['id']

4336

last_id = videos[-1]['id']

4337

watch_endpoint = try_get(

4338

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4339

headers = self.generate_api_headers(

4340

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4341

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4342

query = {

4343

'playlistId': playlist_id,

4344

'videoId': watch_endpoint.get('videoId') or last_id,

4345

'index': watch_endpoint.get('index') or len(videos),

4346

'params': watch_endpoint.get('params') or 'OAE%3D'

4347

}

4348

response = self._extract_response(

4349

item_id='%s page %d' % (playlist_id, page_num),

4350

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4351

check_get_keys='contents'

4352

)

4353

playlist = try_get(

4354

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4355

4356

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4357

title = playlist.get('title') or try_get(

4358

data, lambda x: x['titleText']['simpleText'], compat_str)

4359

playlist_id = playlist.get('playlistId') or item_id

4360

4361

# Delegating everything except mix playlists to regular tab-based playlist URL

4362

playlist_url = urljoin(url, try_get(

4363

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4364

compat_str))

4365

4366

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4367

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4368

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4369

4370

if playlist_url and playlist_url != url and not is_known_unviewable:

4371

return self.url_result(

4372

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4373

video_title=title)

4374

4375

return self.playlist_result(

4376

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4377

playlist_id=playlist_id, playlist_title=title)

4378

4379

def _extract_availability(self, data):

4380

"""

4381

Gets the availability of a given playlist/tab.

4382

Note: Unless YouTube tells us explicitly, we do not assume it is public

4383

@param data: response

4384

"""

4385

is_private = is_unlisted = None

4386

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4387

badge_labels = self._extract_badges(renderer)

4388

4389

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4390

privacy_dropdown_entries = try_get(

4391

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4392

for renderer_dict in privacy_dropdown_entries:

4393

is_selected = try_get(

4394

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4395

if not is_selected:

4396

continue

4397

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4398

if label:

4399

badge_labels.add(label.lower())

4400

break

4401

4402

for badge_label in badge_labels:

4403

if badge_label == 'unlisted':

4404

is_unlisted = True

4405

elif badge_label == 'private':

4406

is_private = True

4407

elif badge_label == 'public':

4408

is_unlisted = is_private = False

4409

return self._availability(is_private, False, False, False, is_unlisted)

4410

4411

@staticmethod

4412

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4413

sidebar_renderer = try_get(

4414

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4415

for item in sidebar_renderer:

4416

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4421

"""

4422

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4423

"""

4424

browse_id = params = None

4425

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4426

if not renderer:

4427

return

4428

menu_renderer = try_get(

4429

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4430

for menu_item in menu_renderer:

4431

if not isinstance(menu_item, dict):

4432

continue

4433

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4434

text = try_get(

4435

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4436

if not text or text.lower() != 'show unavailable videos':

4437

continue

4438

browse_endpoint = try_get(

4439

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4440

browse_id = browse_endpoint.get('browseId')

4441

params = browse_endpoint.get('params')

4442

break

4443

4444

headers = self.generate_api_headers(

4445

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4446

visitor_data=self._extract_visitor_data(data, ytcfg))

4447

query = {

4448

'params': params or 'wgYCCAA=',

4449

'browseId': browse_id or 'VL%s' % item_id

4450

}

4451

return self._extract_response(

4452

item_id=item_id, headers=headers, query=query,

4453

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4454

note='Downloading API JSON with unavailable videos')

4455

4456

@functools.cached_property

4457

def skip_webpage(self):

4458

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4459

4460

def _extract_webpage(self, url, item_id, fatal=True):

4461

retries = self.get_param('extractor_retries', 3)

4462

count = -1

4463

webpage = data = last_error = None

4464

while count < retries:

4465

count += 1

4466

# Sometimes youtube returns a webpage with incomplete ytInitialData

4467

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4468

if last_error:

4469

self.report_warning('%s. Retrying ...' % last_error)

4470

try:

4471

webpage = self._download_webpage(

4472

url, item_id,

4473

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4474

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4475

except ExtractorError as e:

4476

if isinstance(e.cause, network_exceptions):

4477

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4478

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4488

except ExtractorError as e:

4489

if fatal:

4490

raise

4491

self.report_warning(error_to_compat_str(e))

4492

break

4493

4494

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4495

break

4496

4497

last_error = 'Incomplete yt initial data received'

4498

if count >= retries:

4499

if fatal:

4500

raise ExtractorError(last_error)

4501

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4507

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4508

if not ytcfg and self.is_authenticated:

4509

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4510

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4511

raise ExtractorError(

4512

f'{msg}. If you are not downloading private content, or '

4513

'your cookies are only for the first account and channel,'

4514

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4515

expected=True)

4516

self.report_warning(msg, only_once=True)

4517

4518

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4519

data = None

4520

if not self.skip_webpage:

4521

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4522

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4523

# Reject webpage data if redirected to home page without explicitly requesting

4524

selected_tab = self._extract_selected_tab(traverse_obj(

4525

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4526

if (url != 'https://www.youtube.com/feed/recommended'

4527

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4528

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4529

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4530

if fatal:

4531

raise ExtractorError(msg, expected=True)

4532

self.report_warning(msg, only_once=True)

4533

if not data:

4534

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4535

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4536

return data, ytcfg

4537

4538

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4539

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4540

resolve_response = self._extract_response(

4541

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4542

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4543

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4544

for ep_key, ep in endpoints.items():

4545

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4546

if params:

4547

return self._extract_response(

4548

item_id=item_id, query=params, ep=ep, headers=headers,

4549

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4550

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4551

err_note = 'Failed to resolve url (does the playlist exist?)'

4552

if fatal:

4553

raise ExtractorError(err_note, expected=True)

4554

self.report_warning(err_note, item_id)

4555

4556

_SEARCH_PARAMS = None

4557

4558

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4559

data = {'query': query}

4560

if params is NO_DEFAULT:

4561

params = self._SEARCH_PARAMS

4562

if params:

4563

data['params'] = params

4564

4565

content_keys = (

4566

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4567

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4568

# ytmusic search

4569

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4570

('continuationContents', ),

4571

)

4572

display_id = f'query "{query}"'

4573

check_get_keys = tuple({keys[0] for keys in content_keys})

4574

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4575

self._report_playlist_authcheck(ytcfg, fatal=False)

4576

4577

continuation_list = [None]

4578

search = None

4579

for page_num in itertools.count(1):

4580

data.update(continuation_list[0] or {})

4581

headers = self.generate_api_headers(

4582

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4583

search = self._extract_response(

4584

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4585

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4586

slr_contents = traverse_obj(search, *content_keys)

4587

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4588

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4593

IE_DESC = 'YouTube Tabs'

4594

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4603

(?P<not_channel>

4604

feed/|hashtag/|

4605

(?:playlist|watch)\?.*?\blist=

4606

)|

4607

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4612

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4613

}

4614

IE_NAME = 'youtube:tab'

4615

4616

_TESTS = [{

4617

'note': 'playlists, multipage',

4618

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4619

'playlist_mincount': 94,

4620

'info_dict': {

4621

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4622

'title': 'Igor Kleiner - Playlists',

4623

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4624

'uploader': 'Igor Kleiner',

4625

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4626

'channel': 'Igor Kleiner',

4627

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4628

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4629

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4630

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4631

'channel_follower_count': int

4632

},

4633

}, {

4634

'note': 'playlists, multipage, different order',

4635

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4636

'playlist_mincount': 94,

4637

'info_dict': {

4638

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4639

'title': 'Igor Kleiner - Playlists',

4640

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4641

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4642

'uploader': 'Igor Kleiner',

4643

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4644

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4645

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4646

'channel': 'Igor Kleiner',

4647

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4648

'channel_follower_count': int

4649

},

4650

}, {

4651

'note': 'playlists, series',

4652

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4653

'playlist_mincount': 5,

4654

'info_dict': {

4655

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4656

'title': '3Blue1Brown - Playlists',

4657

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4658

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4659

'uploader': '3Blue1Brown',

4660

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4661

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4662

'channel': '3Blue1Brown',

4663

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4664

'tags': ['Mathematics'],

4665

'channel_follower_count': int

4666

},

4667

}, {

4668

'note': 'playlists, singlepage',

4669

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4670

'playlist_mincount': 4,

4671

'info_dict': {

4672

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4673

'title': 'ThirstForScience - Playlists',

4674

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4675

'uploader': 'ThirstForScience',

4676

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4677

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4678

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4679

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4680

'tags': 'count:13',

4681

'channel': 'ThirstForScience',

4682

'channel_follower_count': int

4683

}

4684

}, {

4685

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4686

'only_matching': True,

4687

}, {

4688

'note': 'basic, single video playlist',

4689

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4690

'info_dict': {

4691

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4692

'uploader': 'Sergey M.',

4693

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4694

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4699

'channel': 'Sergey M.',

4700

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4701

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4702

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4707

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4708

'info_dict': {

4709

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4710

'uploader': 'Sergey M.',

4711

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4712

'title': 'youtube-dl empty playlist',

4713

'tags': [],

4714

'channel': 'Sergey M.',

4715

'description': '',

4716

'modified_date': '20160902',

4717

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4718

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4719

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4725

'info_dict': {

4726

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4727

'title': 'lex will - Home',

4728

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4729

'uploader': 'lex will',

4730

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4731

'channel': 'lex will',

4732

'tags': ['bible', 'history', 'prophesy'],

4733

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4734

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4735

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4736

'channel_follower_count': int

4737

},

4738

'playlist_mincount': 2,

4739

}, {

4740

'note': 'Videos tab',

4741

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4742

'info_dict': {

4743

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4744

'title': 'lex will - Videos',

4745

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4746

'uploader': 'lex will',

4747

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4748

'tags': ['bible', 'history', 'prophesy'],

4749

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4750

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4751

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4752

'channel': 'lex will',

4753

'channel_follower_count': int

4754

},

4755

'playlist_mincount': 975,

4756

}, {

4757

'note': 'Videos tab, sorted by popular',

4758

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4759

'info_dict': {

4760

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4761

'title': 'lex will - Videos',

4762

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4763

'uploader': 'lex will',

4764

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4765

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4766

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4767

'channel': 'lex will',

4768

'tags': ['bible', 'history', 'prophesy'],

4769

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4770

'channel_follower_count': int

4771

},

4772

'playlist_mincount': 199,

4773

}, {

4774

'note': 'Playlists tab',

4775

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4776

'info_dict': {

4777

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4778

'title': 'lex will - Playlists',

4779

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4780

'uploader': 'lex will',

4781

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4782

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'channel': 'lex will',

4784

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4785

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4786

'tags': ['bible', 'history', 'prophesy'],

4787

'channel_follower_count': int

4788

},

4789

'playlist_mincount': 17,

4790

}, {

4791

'note': 'Community tab',

4792

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4793

'info_dict': {

4794

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4795

'title': 'lex will - Community',

4796

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4797

'uploader': 'lex will',

4798

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4799

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'channel': 'lex will',

4801

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4802

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4803

'tags': ['bible', 'history', 'prophesy'],

4804

'channel_follower_count': int

4805

},

4806

'playlist_mincount': 18,

4807

}, {

4808

'note': 'Channels tab',

4809

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4810

'info_dict': {

4811

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4812

'title': 'lex will - Channels',

4813

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4814

'uploader': 'lex will',

4815

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4816

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4817

'channel': 'lex will',

4818

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4819

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4820

'tags': ['bible', 'history', 'prophesy'],

4821

'channel_follower_count': int

4822

},

4823

'playlist_mincount': 12,

4824

}, {

4825

'note': 'Search tab',

4826

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4827

'playlist_mincount': 40,

4828

'info_dict': {

4829

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4830

'title': '3Blue1Brown - Search - linear algebra',

4831

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4832

'uploader': '3Blue1Brown',

4833

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4834

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4835

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4836

'tags': ['Mathematics'],

4837

'channel': '3Blue1Brown',

4838

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4839

'channel_follower_count': int

4840

},

4841

}, {

4842

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4843

'only_matching': True,

4844

}, {

4845

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4846

'only_matching': True,

4847

}, {

4848

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4849

'only_matching': True,

4850

}, {

4851

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4852

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4853

'info_dict': {

4854

'title': '29C3: Not my department',

4855

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4856

'uploader': 'Christiaan008',

4857

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4858

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4859

'tags': [],

4860

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4861

'view_count': int,

4862

'modified_date': '20150605',

4863

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4864

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4865

'channel': 'Christiaan008',

4866

},

4867

'playlist_count': 96,

4868

}, {

4869

'note': 'Large playlist',

4870

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4871

'info_dict': {

4872

'title': 'Uploads from Cauchemar',

4873

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4874

'uploader': 'Cauchemar',

4875

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4876

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4877

'tags': [],

4878

'modified_date': r're:\d{8}',

4879

'channel': 'Cauchemar',

4880

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4881

'view_count': int,

4882

'description': '',

4883

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4884

},

4885

'playlist_mincount': 1123,

4886

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4887

}, {

4888

'note': 'even larger playlist, 8832 videos',

4889

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4890

'only_matching': True,

4891

}, {

4892

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4893

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4894

'info_dict': {

4895

'title': 'Uploads from Interstellar Movie',

4896

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4897

'uploader': 'Interstellar Movie',

4898

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4899

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4900

'tags': [],

4901

'view_count': int,

4902

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4903

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4904

'channel': 'Interstellar Movie',

4905

'description': '',

4906

'modified_date': r're:\d{8}',

4907

},

4908

'playlist_mincount': 21,

4909

}, {

4910

'note': 'Playlist with "show unavailable videos" button',

4911

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4912

'info_dict': {

4913

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4914

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4915

'uploader': 'Phim Siêu Nhân Nhật Bản',

4916

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4917

'view_count': int,

4918

'channel': 'Phim Siêu Nhân Nhật Bản',

4919

'tags': [],

4920

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4921

'description': '',

4922

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4923

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4924

'modified_date': r're:\d{8}',

4925

},

4926

'playlist_mincount': 200,

4927

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4928

}, {

4929

'note': 'Playlist with unavailable videos in page 7',

4930

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4931

'info_dict': {

4932

'title': 'Uploads from BlankTV',

4933

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4934

'uploader': 'BlankTV',

4935

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4936

'channel': 'BlankTV',

4937

'channel_url': 'https://www.youtube.com/c/blanktv',

4938

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4939

'view_count': int,

4940

'tags': [],

4941

'uploader_url': 'https://www.youtube.com/c/blanktv',

4942

'modified_date': r're:\d{8}',

4943

'description': '',

4944

},

4945

'playlist_mincount': 1000,

4946

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4947

}, {

4948

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4949

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4950

'info_dict': {

4951

'title': 'Data Analysis with Dr Mike Pound',

4952

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4953

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4954

'uploader': 'Computerphile',

4955

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4956

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4957

'tags': [],

4958

'view_count': int,

4959

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4960

'channel_url': 'https://www.youtube.com/user/Computerphile',

4961

'channel': 'Computerphile',

4962

},

4963

'playlist_mincount': 11,

4964

}, {

4965

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4966

'only_matching': True,

4967

}, {

4968

'note': 'Playlist URL that does not actually serve a playlist',

4969

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4974

'uploader': 'STREEM',

4975

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4976

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4977

'upload_date': '20150526',

4978

'license': 'Standard YouTube License',

4979

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4980

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4987

},

4988

'skip': 'This video is not available.',

4989

'add_ie': [YoutubeIE.ie_key()],

4990

}, {

4991

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4992

'only_matching': True,

4993

}, {

4994

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4995

'only_matching': True,

4996

}, {

4997

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4998

'info_dict': {

4999

'id': 'GgL890LIznQ', # This will keep changing

5000

'ext': 'mp4',

5001

'title': str,

5002

'uploader': 'Sky News',

5003

'uploader_id': 'skynews',

5004

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5005

'upload_date': r're:\d{8}',

5006

'description': str,

5007

'categories': ['News & Politics'],

5008

'tags': list,

5009

'like_count': int,

5010

'release_timestamp': 1642502819,

5011

'channel': 'Sky News',

5012

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5013

'age_limit': 0,

5014

'view_count': int,

5015

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5016

'playable_in_embed': True,

5017

'release_date': '20220118',

5018

'availability': 'public',

5019

'live_status': 'is_live',

5020

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5021

'channel_follower_count': int

5022

},

5023

'params': {

5024

'skip_download': True,

5025

},

5026

'expected_warnings': ['Ignoring subtitle tracks found in '],

5027

}, {

5028

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5033

'uploader': 'The Young Turks',

5034

'uploader_id': 'TheYoungTurks',

5035

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5036

'upload_date': '20150715',

5037

'license': 'Standard YouTube License',

5038

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5039

'categories': ['News & Politics'],

5040

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5045

},

5046

'only_matching': True,

5047

}, {

5048

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5049

'only_matching': True,

5050

}, {

5051

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5052

'only_matching': True,

5053

}, {

5054

'note': 'A channel that is not live. Should raise error',

5055

'url': 'https://www.youtube.com/user/numberphile/live',

5056

'only_matching': True,

5057

}, {

5058

'url': 'https://www.youtube.com/feed/trending',

5059

'only_matching': True,

5060

}, {

5061

'url': 'https://www.youtube.com/feed/library',

5062

'only_matching': True,

5063

}, {

5064

'url': 'https://www.youtube.com/feed/history',

5065

'only_matching': True,

5066

}, {

5067

'url': 'https://www.youtube.com/feed/subscriptions',

5068

'only_matching': True,

5069

}, {

5070

'url': 'https://www.youtube.com/feed/watch_later',

5071

'only_matching': True,

5072

}, {

5073

'note': 'Recommended - redirects to home page.',

5074

'url': 'https://www.youtube.com/feed/recommended',

5075

'only_matching': True,

5076

}, {

5077

'note': 'inline playlist with not always working continuations',

5078

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5079

'only_matching': True,

5080

}, {

5081

'url': 'https://www.youtube.com/course',

5082

'only_matching': True,

5083

}, {

5084

'url': 'https://www.youtube.com/zsecurity',

5085

'only_matching': True,

5086

}, {

5087

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5088

'only_matching': True,

5089

}, {

5090

'url': 'https://www.youtube.com/TheYoungTurks/live',

5091

'only_matching': True,

5092

}, {

5093

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5100

}, {

5101

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5102

'only_matching': True,

5103

}, {

5104

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5105

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5106

'only_matching': True

5107

}, {

5108

'note': '/browse/ should redirect to /channel/',

5109

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5110

'only_matching': True

5111

}, {

5112

'note': 'VLPL, should redirect to playlist?list=PL...',

5113

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5114

'info_dict': {

5115

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5116

'uploader': 'NoCopyrightSounds',

5117

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5118

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5119

'title': 'NCS Releases',

5120

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5121

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5122

'modified_date': r're:\d{8}',

5123

'view_count': int,

5124

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5125

'tags': [],

5126

'channel': 'NoCopyrightSounds',

5127

},

5128

'playlist_mincount': 166,

5129

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5130

}, {

5131

'note': 'Topic, should redirect to playlist?list=UU...',

5132

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5133

'info_dict': {

5134

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5135

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5136

'title': 'Uploads from Royalty Free Music - Topic',

5137

'uploader': 'Royalty Free Music - Topic',

5138

'tags': [],

5139

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5140

'channel': 'Royalty Free Music - Topic',

5141

'view_count': int,

5142

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5143

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5144

'modified_date': r're:\d{8}',

5145

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5146

'description': '',

5147

},

5148

'expected_warnings': [

5149

'The URL does not have a videos tab',

5150

r'[Uu]navailable videos (are|will be) hidden',

5151

],

5152

'playlist_mincount': 101,

5153

}, {

5154

'note': 'Topic without a UU playlist',

5155

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5156

'info_dict': {

5157

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5158

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5159

'tags': [],

5160

},

5161

'expected_warnings': [

5162

'the playlist redirect gave error',

5163

],

5164

'playlist_mincount': 9,

5165

}, {

5166

'note': 'Youtube music Album',

5167

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5168

'info_dict': {

5169

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5170

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5175

'modified_date': r're:\d{8}',

5176

},

5177

'playlist_count': 50,

5178

}, {

5179

'note': 'unlisted single video playlist',

5180

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5181

'info_dict': {

5182

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5183

'uploader': 'colethedj',

5184

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5185

'title': 'yt-dlp unlisted playlist test',

5186

'availability': 'unlisted',

5187

'tags': [],

5188

'modified_date': '20211208',

5189

'channel': 'colethedj',

5190

'view_count': int,

5191

'description': '',

5192

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5193

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5194

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5199

'url': 'https://www.youtube.com/feed/recommended',

5200

'info_dict': {

5201

'id': 'recommended',

5202

'title': 'recommended',

5203

'tags': [],

5204

},

5205

'playlist_mincount': 50,

5206

'params': {

5207

'skip_download': True,

5208

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5209

},

5210

}, {

5211

'note': 'API Fallback: /videos tab, sorted by oldest first',

5212

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5213

'info_dict': {

5214

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5215

'title': 'Cody\'sLab - Videos',

5216

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5217

'uploader': 'Cody\'sLab',

5218

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5219

'channel': 'Cody\'sLab',

5220

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5221

'tags': [],

5222

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5223

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5224

'channel_follower_count': int

5225

},

5226

'playlist_mincount': 650,

5227

'params': {

5228

'skip_download': True,

5229

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5230

},

5231

}, {

5232

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5233

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5234

'info_dict': {

5235

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5236

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5237

'title': 'Uploads from Royalty Free Music - Topic',

5238

'uploader': 'Royalty Free Music - Topic',

5239

'modified_date': r're:\d{8}',

5240

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5241

'description': '',

5242

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5243

'tags': [],

5244

'channel': 'Royalty Free Music - Topic',

5245

'view_count': int,

5246

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5247

},

5248

'expected_warnings': [

5249

'does not have a videos tab',

5250

r'[Uu]navailable videos (are|will be) hidden',

5251

],

5252

'playlist_mincount': 101,

5253

'params': {

5254

'skip_download': True,

5255

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5256

},

5257

}, {

5258

'note': 'non-standard redirect to regional channel',

5259

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5260

'only_matching': True

5261

}, {

5262

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5263

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5264

'info_dict': {

5265

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5266

'modified_date': '20220407',

5267

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5268

'tags': [],

5269

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5270

'uploader': 'pukkandan',

5271

'availability': 'unlisted',

5272

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5273

'channel': 'pukkandan',

5274

'description': 'Test for collaborative playlist',

5275

'title': 'yt-dlp test - collaborative playlist',

5276

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5277

},

5278

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5283

return False if YoutubeIE.suitable(url) else super().suitable(url)

5284

5285

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5286

5287

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5288

def _real_extract(self, url, smuggled_data):

5289

item_id = self._match_id(url)

5290

url = compat_urlparse.urlunparse(

5291

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5292

compat_opts = self.get_param('compat_opts', [])

5293

5294

def get_mobj(url):

5295

mobj = self._URL_RE.match(url).groupdict()

5296

mobj.update((k, '') for k, v in mobj.items() if v is None)

5297

return mobj

5298

5299

mobj, redirect_warning = get_mobj(url), None

5300

# Youtube returns incomplete data if tabname is not lower case

5301

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5302

if is_channel:

5303

if smuggled_data.get('is_music_url'):

5304

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5305

item_id = item_id[2:]

5306

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5307

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5308

mdata = self._extract_tab_endpoint(

5309

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5310

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5311

get_all=False, expected_type=compat_str)

5312

if not murl:

5313

raise ExtractorError('Failed to resolve album to playlist')

5314

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5315

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5316

pre = f'https://www.youtube.com/channel/{item_id}'

5317

5318

original_tab_name = tab

5319

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5320

# Home URLs should redirect to /videos/

5321

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5322

'To download only the videos in the home page, add a "/featured" to the URL')

5323

tab = '/videos'

5324

5325

url = ''.join((pre, tab, post))

5326

mobj = get_mobj(url)

5327

5328

# Handle both video/playlist URLs

5329

qs = parse_qs(url)

5330

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5331

5332

if not video_id and mobj['not_channel'].startswith('watch'):

5333

if not playlist_id:

5334

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5335

raise ExtractorError('Unable to recognize tab page')

5336

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5337

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5338

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5339

mobj = get_mobj(url)

5340

5341

if video_id and playlist_id:

5342

if self.get_param('noplaylist'):

5343

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5344

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5345

ie=YoutubeIE.ie_key(), video_id=video_id)

5346

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5347

5348

data, ytcfg = self._extract_data(url, item_id)

5349

5350

# YouTube may provide a non-standard redirect to the regional channel

5351

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5352

redirect_url = traverse_obj(

5353

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5354

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5355

redirect_url = ''.join((

5356

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5357

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5358

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5359

5360

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5361

if tabs:

5362

selected_tab = self._extract_selected_tab(tabs)

5363

selected_tab_name = selected_tab.get('title', '').lower()

5364

if selected_tab_name == 'home':

5365

selected_tab_name = 'featured'

5366

requested_tab_name = mobj['tab'][1:]

5367

if 'no-youtube-channel-redirect' not in compat_opts:

5368

if requested_tab_name == 'live':

5369

# Live tab should have redirected to the video

5370

raise ExtractorError('The channel is not currently live', expected=True)

5371

if requested_tab_name not in ('', selected_tab_name):

5372

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5373

if not original_tab_name:

5374

if item_id[:2] == 'UC':

5375

# Topic channels don't have /videos. Use the equivalent playlist instead

5376

pl_id = f'UU{item_id[2:]}'

5377

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5378

try:

5379

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5380

except ExtractorError:

5381

redirect_warning += ' and the playlist redirect gave error'

5382

else:

5383

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5384

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5385

if selected_tab_name and selected_tab_name != requested_tab_name:

5386

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5387

else:

5388

raise ExtractorError(redirect_warning, expected=True)

5389

5390

if redirect_warning:

5391

self.to_screen(redirect_warning)

5392

self.write_debug(f'Final URL: {url}')

5393

5394

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5395

if 'no-youtube-unavailable-videos' not in compat_opts:

5396

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5397

self._extract_and_report_alerts(data, only_once=True)

5398

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5399

if tabs:

5400

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5401

5402

playlist = traverse_obj(

5403

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5404

if playlist:

5405

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5406

5407

video_id = traverse_obj(

5408

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5409

if video_id:

5410

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5411

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5412

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5413

ie=YoutubeIE.ie_key(), video_id=video_id)

5414

5415

raise ExtractorError('Unable to recognize tab page')

5416

5417

5418

class YoutubePlaylistIE(InfoExtractor):

5419

IE_DESC = 'YouTube playlists'

5420

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5431

)''' % {

5432

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5433

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5434

}

5435

IE_NAME = 'youtube:playlist'

5436

_TESTS = [{

5437

'note': 'issue #673',

5438

'url': 'PLBB231211A4F62143',

5439

'info_dict': {

5440

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5441

'id': 'PLBB231211A4F62143',

5442

'uploader': 'Wickman',

5443

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5444

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5445

'view_count': int,

5446

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5447

'modified_date': r're:\d{8}',

5448

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5449

'channel': 'Wickman',

5450

'tags': [],

5451

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5452

},

5453

'playlist_mincount': 29,

5454

}, {

5455

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5456

'info_dict': {

5457

'title': 'YDL_safe_search',

5458

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5459

},

5460

'playlist_count': 2,

5461

'skip': 'This playlist is private',

5462

}, {

5463

'note': 'embedded',

5464

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5469

'uploader': 'milan',

5470

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5471

'description': '',

5472

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5473

'tags': [],

5474

'modified_date': '20140919',

5475

'view_count': int,

5476

'channel': 'milan',

5477

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5478

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5479

},

5480

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5481

}, {

5482

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5483

'playlist_mincount': 654,

5484

'info_dict': {

5485

'title': '2018 Chinese New Singles (11/6 updated)',

5486

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5487

'uploader': 'LBK',

5488

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5489

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5490

'channel': 'LBK',

5491

'view_count': int,

5492

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5493

'tags': [],

5494

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5495

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5496

'modified_date': r're:\d{8}',

5497

},

5498

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5499

}, {

5500

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5501

'only_matching': True,

5502

}, {

5503

# music album playlist

5504

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5505

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5510

if YoutubeTabIE.suitable(url):

5511

return False

5512

from ..utils import parse_qs

5513

qs = parse_qs(url)

5514

if qs.get('v', [None])[0]:

5515

return False

5516

return super().suitable(url)

5517

5518

def _real_extract(self, url):

5519

playlist_id = self._match_id(url)

5520

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5521

url = update_url_query(

5522

'https://www.youtube.com/playlist',

5523

parse_qs(url) or {'list': playlist_id})

5524

if is_music_url:

5525

url = smuggle_url(url, {'is_music_url': True})

5526

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5527

5528

5529

class YoutubeYtBeIE(InfoExtractor):

5530

IE_DESC = 'youtu.be'

5531

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5532

_TESTS = [{

5533

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5538

'uploader': 'Backus-Page House Museum',

5539

'uploader_id': 'backuspagemuseum',

5540

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5541

'upload_date': '20161008',

5542

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5543

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5548

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5549

'channel': 'Backus-Page House Museum',

5550

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5551

'live_status': 'not_live',

5552

'view_count': int,

5553

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5554

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5560

},

5561

}, {

5562

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5563

'only_matching': True,

5564

}]

5565

5566

def _real_extract(self, url):

5567

mobj = self._match_valid_url(url)

5568

video_id = mobj.group('id')

5569

playlist_id = mobj.group('playlist_id')

5570

return self.url_result(

5571

update_url_query('https://www.youtube.com/watch', {

5572

'v': video_id,

5573

'list': playlist_id,

5574

'feature': 'youtu.be',

5575

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5576

5577

5578

class YoutubeLivestreamEmbedIE(InfoExtractor):

5579

IE_DESC = 'YouTube livestream embeds'

5580

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5581

_TESTS = [{

5582

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5583

'only_matching': True,

5584

}]

5585

5586

def _real_extract(self, url):

5587

channel_id = self._match_id(url)

5588

return self.url_result(

5589

f'https://www.youtube.com/channel/{channel_id}/live',

5590

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5591

5592

5593

class YoutubeYtUserIE(InfoExtractor):

5594

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5595

IE_NAME = 'youtube:user'

5596

_VALID_URL = r'ytuser:(?P<id>.+)'

5597

_TESTS = [{

5598

'url': 'ytuser:phihag',

5599

'only_matching': True,

5600

}]

5601

5602

def _real_extract(self, url):

5603

user_id = self._match_id(url)

5604

return self.url_result(

5605

'https://www.youtube.com/user/%s/videos' % user_id,

5606

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5607

5608

5609

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5610

IE_NAME = 'youtube:favorites'

5611

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5612

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5613

_LOGIN_REQUIRED = True

5614

_TESTS = [{

5615

'url': ':ytfav',

5616

'only_matching': True,

5617

}, {

5618

'url': ':ytfavorites',

5619

'only_matching': True,

5620

}]

5621

5622

def _real_extract(self, url):

5623

return self.url_result(

5624

'https://www.youtube.com/playlist?list=LL',

5625

ie=YoutubeTabIE.ie_key())

5626

5627

5628

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5629

IE_NAME = 'youtube:notif'

5630

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5631

_VALID_URL = r':ytnotif(?:ication)?s?'

5632

_LOGIN_REQUIRED = True

5633

_TESTS = [{

5634

'url': ':ytnotif',

5635

'only_matching': True,

5636

}, {

5637

'url': ':ytnotifications',

5638

'only_matching': True,

5639

}]

5640

5641

def _extract_notification_menu(self, response, continuation_list):

5642

notification_list = traverse_obj(

5643

response,

5644

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5645

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5646

expected_type=list) or []

5647

continuation_list[0] = None

5648

for item in notification_list:

5649

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5650

if entry:

5651

yield entry

5652

continuation = item.get('continuationItemRenderer')

5653

if continuation:

5654

continuation_list[0] = continuation

5655

5656

def _extract_notification_renderer(self, notification):

5657

video_id = traverse_obj(

5658

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5659

url = f'https://www.youtube.com/watch?v={video_id}'

5660

channel_id = None

5661

if not video_id:

5662

browse_ep = traverse_obj(

5663

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5664

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5665

post_id = self._search_regex(

5666

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5667

'post id', default=None)

5668

if not channel_id or not post_id:

5669

return

5670

# The direct /post url redirects to this in the browser

5671

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5672

5673

channel = traverse_obj(

5674

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5675

expected_type=str)

5676

notification_title = self._get_text(notification, 'shortMessage')

5677

if notification_title:

5678

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5679

# TODO: handle recommended videos

5680

title = self._search_regex(

5681

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5682

'video title', default=None)

5683

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5684

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5690

'video_id': video_id,

5691

'title': title,

5692

'channel_id': channel_id,

5693

'channel': channel,

5694

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5695

'upload_date': upload_date,

5696

}

5697

5698

def _notification_menu_entries(self, ytcfg):

5699

continuation_list = [None]

5700

response = None

5701

for page in itertools.count(1):

5702

ctoken = traverse_obj(

5703

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5704

response = self._extract_response(

5705

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5706

ep='notification/get_notification_menu', check_get_keys='actions',

5707

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5708

yield from self._extract_notification_menu(response, continuation_list)

5709

if not continuation_list[0]:

5710

break

5711

5712

def _real_extract(self, url):

5713

display_id = 'notifications'

5714

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5715

self._report_playlist_authcheck(ytcfg)

5716

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5717

5718

5719

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5720

IE_DESC = 'YouTube search'

5721

IE_NAME = 'youtube:search'

5722

_SEARCH_KEY = 'ytsearch'

5723

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5724

_TESTS = [{

5725

'url': 'ytsearch5:youtube-dl test video',

5726

'playlist_count': 5,

5727

'info_dict': {

5728

'id': 'youtube-dl test video',

5729

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5735

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5736

_SEARCH_KEY = 'ytsearchdate'

5737

IE_DESC = 'YouTube search, newest videos first'

5738

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5739

_TESTS = [{

5740

'url': 'ytsearchdate5:youtube-dl test video',

5741

'playlist_count': 5,

5742

'info_dict': {

5743

'id': 'youtube-dl test video',

5744

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5750

IE_DESC = 'YouTube search URLs with sorting and filter support'

5751

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5752

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5753

_TESTS = [{

5754

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5755

'playlist_mincount': 5,

5756

'info_dict': {

5757

'id': 'youtube-dl test video',

5758

'title': 'youtube-dl test video',

5759

}

5760

}, {

5761

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5762

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5769

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5780

'only_matching': True,

5781

}]

5782

5783

def _real_extract(self, url):

5784

qs = parse_qs(url)

5785

query = (qs.get('search_query') or qs.get('q'))[0]

5786

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5787

5788

5789

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5790

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5791

IE_NAME = 'youtube:music:search_url'

5792

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5793

_TESTS = [{

5794

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5795

'playlist_count': 16,

5796

'info_dict': {

5797

'id': 'royalty free music',

5798

'title': 'royalty free music',

5799

}

5800

}, {

5801

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5802

'playlist_mincount': 30,

5803

'info_dict': {

5804

'id': 'royalty free music - songs',

5805

'title': 'royalty free music - songs',

5806

},

5807

'params': {'extract_flat': 'in_playlist'}

5808

}, {

5809

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5810

'playlist_mincount': 30,

5811

'info_dict': {

5812

'id': 'royalty free music - community playlists',

5813

'title': 'royalty free music - community playlists',

5814

},

5815

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5820

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5821

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5822

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5823

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5824

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5825

}

5826

5827

def _real_extract(self, url):

5828

qs = parse_qs(url)

5829

query = (qs.get('search_query') or qs.get('q'))[0]

5830

params = qs.get('sp', (None,))[0]

5831

if params:

5832

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5833

else:

5834

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5835

params = self._SECTIONS.get(section)

5836

if not params:

5837

section = None

5838

title = join_nonempty(query, section, delim=' - ')

5839

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5840

5841

5842

class YoutubeFeedsInfoExtractor(InfoExtractor):

5843

"""

5844

Base class for feed extractors

5845

Subclasses must re-define the _FEED_NAME property.

5846

"""

5847

_LOGIN_REQUIRED = True

5848

_FEED_NAME = 'feeds'

5849

5850

def _real_initialize(self):

5851

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5856

5857

def _real_extract(self, url):

5858

return self.url_result(

5859

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5860

5861

5862

class YoutubeWatchLaterIE(InfoExtractor):

5863

IE_NAME = 'youtube:watchlater'

5864

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5865

_VALID_URL = r':ytwatchlater'

5866

_TESTS = [{

5867

'url': ':ytwatchlater',

5868

'only_matching': True,

5869

}]

5870

5871

def _real_extract(self, url):

5872

return self.url_result(

5873

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5874

5875

5876

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5877

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5878

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5879

_FEED_NAME = 'recommended'

5880

_LOGIN_REQUIRED = False

5881

_TESTS = [{

5882

'url': ':ytrec',

5883

'only_matching': True,

5884

}, {

5885

'url': ':ytrecommended',

5886

'only_matching': True,

5887

}, {

5888

'url': 'https://youtube.com',

5889

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5894

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5895

_VALID_URL = r':ytsub(?:scription)?s?'

5896

_FEED_NAME = 'subscriptions'

5897

_TESTS = [{

5898

'url': ':ytsubs',

5899

'only_matching': True,

5900

}, {

5901

'url': ':ytsubscriptions',

5902

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5907

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5908

_VALID_URL = r':ythis(?:tory)?'

5909

_FEED_NAME = 'history'

5910

_TESTS = [{

5911

'url': ':ythistory',

5912

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5917

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5918

IE_NAME = 'youtube:stories'

5919

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5920

_TESTS = [{

5921

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5922

'only_matching': True,

5923

}]

5924

5925

def _real_extract(self, url):

5926

playlist_id = f'RLTD{self._match_id(url)}'

5927

return self.url_result(

5928

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5929

ie=YoutubeTabIE, video_id=playlist_id)

5930

5931

5932

class YoutubeTruncatedURLIE(InfoExtractor):

5933

IE_NAME = 'youtube:truncated_url'

5934

IE_DESC = False # Do not list

5935

_VALID_URL = r'''(?x)

5936

(?:https?://)?

5937

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5938

(?:watch\?(?:

5939

feature=[a-z_]+|

5940

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5953

'only_matching': True,

5954

}, {

5955

'url': 'https://www.youtube.com/watch?',

5956

'only_matching': True,

5957

}, {

5958

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5959

'only_matching': True,

5960

}, {

5961

'url': 'https://www.youtube.com/watch?feature=foo',

5962

'only_matching': True,

5963

}, {

5964

'url': 'https://www.youtube.com/watch?hl=en-GB',

5965

'only_matching': True,

5966

}, {

5967

'url': 'https://www.youtube.com/watch?t=2372',

5968

'only_matching': True,

5969

}]

5970

5971

def _real_extract(self, url):

5972

raise ExtractorError(

5973

'Did you forget to quote the URL? Remember that & is a meta '

5974

'character in most shells, so you want to put the URL in quotes, '

5975

'like youtube-dl '

5976

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5977

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

5982

IE_NAME = 'youtube:clip'

5983

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

5984

_TESTS = [{

5985

# FIXME: Other metadata should be extracted from the clip, not from the base video

5986

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

5987

'info_dict': {

5988

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

5989

'ext': 'mp4',

5990

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

}

}]

def _real_extract(self, url):

5997

clip_id = self._match_id(url)

5998

_, data = self._extract_webpage(url, clip_id)

5999

6000

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6001

if not video_id:

6002

raise ExtractorError('Unable to find video ID')

6003

6004

clip_data = traverse_obj(data, (

6005

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6006

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6007

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6008

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6009

6010

return {

6011

'_type': 'url_transparent',

6012

'url': f'https://www.youtube.com/watch?v={video_id}',

6013

'ie_key': YoutubeIE.ie_key(),

6014

'id': clip_id,

6015

'section_start': int(clip_data['startTimeMs']) / 1000,

6016

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6021

IE_NAME = 'youtube:truncated_id'

6022

IE_DESC = False # Do not list

6023

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6024

6025

_TESTS = [{

6026

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6027

'only_matching': True,

6028

}]

6029

6030

def _real_extract(self, url):

6031

video_id = self._match_id(url)

6032

raise ExtractorError(

6033

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6034

expected=True)