jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import functools
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	classproperty,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	float_or_none,
	40	format_field,
	41	get_first,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73	# any clients starting with _ cannot be explicity requested by the user
	74	INNERTUBE_CLIENTS = {
	75	'web': {
	76	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	77	'INNERTUBE_CONTEXT': {
	78	'client': {
	79	'clientName': 'WEB',
	80	'clientVersion': '2.20211221.00.00',
	81	}
	82	},
	83	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	84	},
	85	'web_embedded': {
	86	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	87	'INNERTUBE_CONTEXT': {
	88	'client': {
	89	'clientName': 'WEB_EMBEDDED_PLAYER',
	90	'clientVersion': '1.20211215.00.01',
	91	},
	92	},
	93	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	94	},
	95	'web_music': {
	96	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	97	'INNERTUBE_HOST': 'music.youtube.com',
	98	'INNERTUBE_CONTEXT': {
	99	'client': {
	100	'clientName': 'WEB_REMIX',
	101	'clientVersion': '1.20211213.00.00',
	102	}
	103	},
	104	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	105	},
	106	'web_creator': {
	107	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	108	'INNERTUBE_CONTEXT': {
	109	'client': {
	110	'clientName': 'WEB_CREATOR',
	111	'clientVersion': '1.20211220.02.00',
	112	}
	113	},
	114	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	115	},
	116	'android': {
	117	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	118	'INNERTUBE_CONTEXT': {
	119	'client': {
	120	'clientName': 'ANDROID',
	121	'clientVersion': '16.49',
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '16.49',
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '4.57',
	144	}
	145	},
	146	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	147	'REQUIRE_JS_PLAYER': False
	148	},
	149	'android_creator': {
	150	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	151	'INNERTUBE_CONTEXT': {
	152	'client': {
	153	'clientName': 'ANDROID_CREATOR',
	154	'clientVersion': '21.47',
	155	},
	156	},
	157	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	158	'REQUIRE_JS_PLAYER': False
	159	},
	160	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	161	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	162	'ios': {
	163	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	164	'INNERTUBE_CONTEXT': {
	165	'client': {
	166	'clientName': 'IOS',
	167	'clientVersion': '16.46',
	168	'deviceModel': 'iPhone14,3',
	169	}
	170	},
	171	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	172	'REQUIRE_JS_PLAYER': False
	173	},
	174	'ios_embedded': {
	175	'INNERTUBE_CONTEXT': {
	176	'client': {
	177	'clientName': 'IOS_MESSAGES_EXTENSION',
	178	'clientVersion': '16.46',
	179	'deviceModel': 'iPhone14,3',
	180	},
	181	},
	182	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	183	'REQUIRE_JS_PLAYER': False
	184	},
	185	'ios_music': {
	186	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	187	'INNERTUBE_CONTEXT': {
	188	'client': {
	189	'clientName': 'IOS_MUSIC',
	190	'clientVersion': '4.57',
	191	},
	192	},
	193	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	194	'REQUIRE_JS_PLAYER': False
	195	},
	196	'ios_creator': {
	197	'INNERTUBE_CONTEXT': {
	198	'client': {
	199	'clientName': 'IOS_CREATOR',
	200	'clientVersion': '21.47',
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	# mweb has 'ultralow' formats
	207	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	208	'mweb': {
	209	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	210	'INNERTUBE_CONTEXT': {
	211	'client': {
	212	'clientName': 'MWEB',
	213	'clientVersion': '2.20211221.01.00',
	214	}
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	217	},
	218	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	219	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	220	'tv_embedded': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	225	'clientVersion': '2.0',
	226	},
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	229	},
	230	}
	231
	232
	233	def _split_innertube_client(client_name):
	234	variant, *base = client_name.rsplit('.', 1)
	235	if base:
	236	return variant, base[0], variant
	237	base, *variant = client_name.split('_', 1)
	238	return client_name, base, variant[0] if variant else None
	239
	240
	241	def build_innertube_clients():
	242	THIRD_PARTY = {
	243	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	244	}
	245	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	246	priority = qualities(BASE_CLIENTS[::-1])
	247
	248	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	249	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	250	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	251	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	252	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	253
	254	_, base_client, variant = _split_innertube_client(client)
	255	ytcfg['priority'] = 10 * priority(base_client)
	256
	257	if not variant:
	258	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	259	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	260	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	261	embedscreen['priority'] -= 3
	262	elif variant == 'embedded':
	263	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	ytcfg['priority'] -= 2
	265	else:
	266	ytcfg['priority'] -= 3
	267
	268
	269	build_innertube_clients()
	270
	271
	272	class YoutubeBaseInfoExtractor(InfoExtractor):
	273	"""Provide base functions for Youtube extractors"""
	274
	275	_RESERVED_NAMES = (
	276	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	277	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	278	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	279	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	280
	281	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	282
	283	# _NETRC_MACHINE = 'youtube'
	284
	285	# If True it will raise an error if no login info is provided
	286	_LOGIN_REQUIRED = False
	287
	288	_INVIDIOUS_SITES = (
	289	# invidious-redirect websites
	290	r'(?:www\.)?redirect\.invidious\.io',
	291	r'(?:(?:www\|dev)\.)?invidio\.us',
	292	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	293	r'(?:www\.)?invidious\.pussthecat\.org',
	294	r'(?:www\.)?invidious\.zee\.li',
	295	r'(?:www\.)?invidious\.ethibox\.fr',
	296	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	297	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	298	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	299	# youtube-dl invidious instances list
	300	r'(?:(?:www\|no)\.)?invidiou\.sh',
	301	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	302	r'(?:www\.)?invidious\.kabi\.tk',
	303	r'(?:www\.)?invidious\.mastodon\.host',
	304	r'(?:www\.)?invidious\.zapashcanon\.fr',
	305	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	306	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	307	r'(?:www\.)?invidious\.himiko\.cloud',
	308	r'(?:www\.)?invidious\.reallyancient\.tech',
	309	r'(?:www\.)?invidious\.tube',
	310	r'(?:www\.)?invidiou\.site',
	311	r'(?:www\.)?invidious\.site',
	312	r'(?:www\.)?invidious\.xyz',
	313	r'(?:www\.)?invidious\.nixnet\.xyz',
	314	r'(?:www\.)?invidious\.048596\.xyz',
	315	r'(?:www\.)?invidious\.drycat\.fr',
	316	r'(?:www\.)?inv\.skyn3t\.in',
	317	r'(?:www\.)?tube\.poal\.co',
	318	r'(?:www\.)?tube\.connect\.cafe',
	319	r'(?:www\.)?vid\.wxzm\.sx',
	320	r'(?:www\.)?vid\.mint\.lgbt',
	321	r'(?:www\.)?vid\.puffyan\.us',
	322	r'(?:www\.)?yewtu\.be',
	323	r'(?:www\.)?yt\.elukerio\.org',
	324	r'(?:www\.)?yt\.lelux\.fi',
	325	r'(?:www\.)?invidious\.ggc-project\.de',
	326	r'(?:www\.)?yt\.maisputain\.ovh',
	327	r'(?:www\.)?ytprivate\.com',
	328	r'(?:www\.)?invidious\.13ad\.de',
	329	r'(?:www\.)?invidious\.toot\.koeln',
	330	r'(?:www\.)?invidious\.fdn\.fr',
	331	r'(?:www\.)?watch\.nettohikari\.com',
	332	r'(?:www\.)?invidious\.namazso\.eu',
	333	r'(?:www\.)?invidious\.silkky\.cloud',
	334	r'(?:www\.)?invidious\.exonip\.de',
	335	r'(?:www\.)?invidious\.riverside\.rocks',
	336	r'(?:www\.)?invidious\.blamefran\.net',
	337	r'(?:www\.)?invidious\.moomoo\.de',
	338	r'(?:www\.)?ytb\.trom\.tf',
	339	r'(?:www\.)?yt\.cyberhost\.uk',
	340	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	341	r'(?:www\.)?qklhadlycap4cnod\.onion',
	342	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	343	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	344	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	345	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	346	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	347	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	348	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	349	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	350	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	351	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	352	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	353	r'(?:www\.)?piped\.kavin\.rocks',
	354	r'(?:www\.)?piped\.silkky\.cloud',
	355	r'(?:www\.)?piped\.tokhmi\.xyz',
	356	r'(?:www\.)?piped\.moomoo\.me',
	357	r'(?:www\.)?il\.ax',
	358	r'(?:www\.)?piped\.syncpundit\.com',
	359	r'(?:www\.)?piped\.mha\.fi',
	360	r'(?:www\.)?piped\.mint\.lgbt',
	361	r'(?:www\.)?piped\.privacy\.com\.de',
	362	)
	363
	364	def _initialize_consent(self):
	365	cookies = self._get_cookies('https://www.youtube.com/')
	366	if cookies.get('__Secure-3PSID'):
	367	return
	368	consent_id = None
	369	consent = cookies.get('CONSENT')
	370	if consent:
	371	if 'YES' in consent.value:
	372	return
	373	consent_id = self._search_regex(
	374	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	375	if not consent_id:
	376	consent_id = random.randint(100, 999)
	377	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	378
	379	def _initialize_pref(self):
	380	cookies = self._get_cookies('https://www.youtube.com/')
	381	pref_cookie = cookies.get('PREF')
	382	pref = {}
	383	if pref_cookie:
	384	try:
	385	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	386	except ValueError:
	387	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	388	pref.update({'hl': 'en', 'tz': 'UTC'})
	389	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	390
	391	def _real_initialize(self):
	392	self._initialize_pref()
	393	self._initialize_consent()
	394	self._check_login_required()
	395
	396	def _check_login_required(self):
	397	if self._LOGIN_REQUIRED and not self._cookies_passed:
	398	self.raise_login_required('Login details are needed to download this content', method='cookies')
	399
	400	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	401	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	402	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	403
	404	def _get_default_ytcfg(self, client='web'):
	405	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	406
	407	def _get_innertube_host(self, client='web'):
	408	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	409
	410	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	411	# try_get but with fallback to default ytcfg client values when present
	412	_func = lambda y: try_get(y, getter, expected_type)
	413	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	414
	415	def _extract_client_name(self, ytcfg, default_client='web'):
	416	return self._ytcfg_get_safe(
	417	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	418	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	419
	420	def _extract_client_version(self, ytcfg, default_client='web'):
	421	return self._ytcfg_get_safe(
	422	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	423	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	424
	425	def _extract_api_key(self, ytcfg=None, default_client='web'):
	426	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	427
	428	def _extract_context(self, ytcfg=None, default_client='web'):
	429	context = get_first(
	430	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	431	# Enforce language and tz for extraction
	432	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	433	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	434	return context
	435
	436	_SAPISID = None
	437
	438	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	439	time_now = round(time.time())
	440	if self._SAPISID is None:
	441	yt_cookies = self._get_cookies('https://www.youtube.com')
	442	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	443	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	444	sapisid_cookie = dict_get(
	445	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	446	if sapisid_cookie and sapisid_cookie.value:
	447	self._SAPISID = sapisid_cookie.value
	448	self.write_debug('Extracted SAPISID cookie')
	449	# SAPISID cookie is required if not already present
	450	if not yt_cookies.get('SAPISID'):
	451	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	452	self._set_cookie(
	453	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	454	else:
	455	self._SAPISID = False
	456	if not self._SAPISID:
	457	return None
	458	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	459	sapisidhash = hashlib.sha1(
	460	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	461	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	462
	463	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	464	note='Downloading API JSON', errnote='Unable to download API page',
	465	context=None, api_key=None, api_hostname=None, default_client='web'):
	466
	467	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	468	data.update(query)
	469	real_headers = self.generate_api_headers(default_client=default_client)
	470	real_headers.update({'content-type': 'application/json'})
	471	if headers:
	472	real_headers.update(headers)
	473	return self._download_json(
	474	f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	data = self._search_regex(
	481	(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	482	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	483	if data:
	484	return self._parse_json(data, item_id, fatal=fatal)
	485
	486	@staticmethod
	487	def _extract_session_index(*data):
	488	"""
	489	Index of current account in account list.
	490	See: https://github.com/yt-dlp/yt-dlp/pull/519
	491	"""
	492	for ytcfg in data:
	493	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	494	if session_index is not None:
	495	return session_index
	496
	497	# Deprecated?
	498	def _extract_identity_token(self, ytcfg=None, webpage=None):
	499	if ytcfg:
	500	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import functools

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

74

INNERTUBE_CLIENTS = {

75

'web': {

76

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

77

'INNERTUBE_CONTEXT': {

78

'client': {

79

'clientName': 'WEB',

80

'clientVersion': '2.20211221.00.00',

81

}

82

},

83

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

84

},

85

'web_embedded': {

86

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

87

'INNERTUBE_CONTEXT': {

88

'client': {

89

'clientName': 'WEB_EMBEDDED_PLAYER',

90

'clientVersion': '1.20211215.00.01',

91

},

92

},

93

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

94

},

95

'web_music': {

96

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

97

'INNERTUBE_HOST': 'music.youtube.com',

98

'INNERTUBE_CONTEXT': {

99

'client': {

100

'clientName': 'WEB_REMIX',

101

'clientVersion': '1.20211213.00.00',

102

}

103

},

104

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

105

},

106

'web_creator': {

107

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

108

'INNERTUBE_CONTEXT': {

109

'client': {

110

'clientName': 'WEB_CREATOR',

111

'clientVersion': '1.20211220.02.00',

112

}

113

},

114

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

115

},

116

'android': {

117

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

118

'INNERTUBE_CONTEXT': {

119

'client': {

120

'clientName': 'ANDROID',

121

'clientVersion': '16.49',

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '16.49',

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '4.57',

144

}

145

},

146

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

147

'REQUIRE_JS_PLAYER': False

148

},

149

'android_creator': {

150

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

151

'INNERTUBE_CONTEXT': {

152

'client': {

153

'clientName': 'ANDROID_CREATOR',

154

'clientVersion': '21.47',

155

},

156

},

157

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

158

'REQUIRE_JS_PLAYER': False

159

},

160

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

161

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

162

'ios': {

163

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

164

'INNERTUBE_CONTEXT': {

165

'client': {

166

'clientName': 'IOS',

167

'clientVersion': '16.46',

168

'deviceModel': 'iPhone14,3',

169

}

170

},

171

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

172

'REQUIRE_JS_PLAYER': False

173

},

174

'ios_embedded': {

175

'INNERTUBE_CONTEXT': {

176

'client': {

177

'clientName': 'IOS_MESSAGES_EXTENSION',

178

'clientVersion': '16.46',

179

'deviceModel': 'iPhone14,3',

180

},

181

},

182

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

183

'REQUIRE_JS_PLAYER': False

184

},

185

'ios_music': {

186

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

187

'INNERTUBE_CONTEXT': {

188

'client': {

189

'clientName': 'IOS_MUSIC',

190

'clientVersion': '4.57',

191

},

192

},

193

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

194

'REQUIRE_JS_PLAYER': False

195

},

196

'ios_creator': {

197

'INNERTUBE_CONTEXT': {

198

'client': {

199

'clientName': 'IOS_CREATOR',

200

'clientVersion': '21.47',

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

204

'REQUIRE_JS_PLAYER': False

205

},

206

# mweb has 'ultralow' formats

207

# See: https://github.com/yt-dlp/yt-dlp/pull/557

208

'mweb': {

209

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

210

'INNERTUBE_CONTEXT': {

211

'client': {

212

'clientName': 'MWEB',

213

'clientVersion': '2.20211221.01.00',

214

}

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

217

},

218

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

219

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

220

'tv_embedded': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

225

'clientVersion': '2.0',

226

},

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

234

variant, *base = client_name.rsplit('.', 1)

235

if base:

236

return variant, base[0], variant

237

base, *variant = client_name.split('_', 1)

238

return client_name, base, variant[0] if variant else None

239

240

241

def build_innertube_clients():

242

THIRD_PARTY = {

243

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

244

}

245

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

246

priority = qualities(BASE_CLIENTS[::-1])

247

248

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

249

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

250

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

251

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

252

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

253

254

_, base_client, variant = _split_innertube_client(client)

255

ytcfg['priority'] = 10 * priority(base_client)

256

257

if not variant:

258

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

259

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

260

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

261

embedscreen['priority'] -= 3

262

elif variant == 'embedded':

263

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

ytcfg['priority'] -= 2

265

else:

266

ytcfg['priority'] -= 3

267

268

269

build_innertube_clients()

270

271

272

class YoutubeBaseInfoExtractor(InfoExtractor):

273

"""Provide base functions for Youtube extractors"""

274

275

_RESERVED_NAMES = (

276

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

282

283

# _NETRC_MACHINE = 'youtube'

284

285

# If True it will raise an error if no login info is provided

286

_LOGIN_REQUIRED = False

287

288

_INVIDIOUS_SITES = (

289

# invidious-redirect websites

290

r'(?:www\.)?redirect\.invidious\.io',

291

r'(?:(?:www|dev)\.)?invidio\.us',

292

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

293

r'(?:www\.)?invidious\.pussthecat\.org',

294

r'(?:www\.)?invidious\.zee\.li',

295

r'(?:www\.)?invidious\.ethibox\.fr',

296

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

297

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

298

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

299

# youtube-dl invidious instances list

300

r'(?:(?:www|no)\.)?invidiou\.sh',

301

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

302

r'(?:www\.)?invidious\.kabi\.tk',

303

r'(?:www\.)?invidious\.mastodon\.host',

304

r'(?:www\.)?invidious\.zapashcanon\.fr',

305

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

306

r'(?:www\.)?invidious\.tinfoil-hat\.net',

307

r'(?:www\.)?invidious\.himiko\.cloud',

308

r'(?:www\.)?invidious\.reallyancient\.tech',

309

r'(?:www\.)?invidious\.tube',

310

r'(?:www\.)?invidiou\.site',

311

r'(?:www\.)?invidious\.site',

312

r'(?:www\.)?invidious\.xyz',

313

r'(?:www\.)?invidious\.nixnet\.xyz',

314

r'(?:www\.)?invidious\.048596\.xyz',

315

r'(?:www\.)?invidious\.drycat\.fr',

316

r'(?:www\.)?inv\.skyn3t\.in',

317

r'(?:www\.)?tube\.poal\.co',

318

r'(?:www\.)?tube\.connect\.cafe',

319

r'(?:www\.)?vid\.wxzm\.sx',

320

r'(?:www\.)?vid\.mint\.lgbt',

321

r'(?:www\.)?vid\.puffyan\.us',

322

r'(?:www\.)?yewtu\.be',

323

r'(?:www\.)?yt\.elukerio\.org',

324

r'(?:www\.)?yt\.lelux\.fi',

325

r'(?:www\.)?invidious\.ggc-project\.de',

326

r'(?:www\.)?yt\.maisputain\.ovh',

327

r'(?:www\.)?ytprivate\.com',

328

r'(?:www\.)?invidious\.13ad\.de',

329

r'(?:www\.)?invidious\.toot\.koeln',

330

r'(?:www\.)?invidious\.fdn\.fr',

331

r'(?:www\.)?watch\.nettohikari\.com',

332

r'(?:www\.)?invidious\.namazso\.eu',

333

r'(?:www\.)?invidious\.silkky\.cloud',

334

r'(?:www\.)?invidious\.exonip\.de',

335

r'(?:www\.)?invidious\.riverside\.rocks',

336

r'(?:www\.)?invidious\.blamefran\.net',

337

r'(?:www\.)?invidious\.moomoo\.de',

338

r'(?:www\.)?ytb\.trom\.tf',

339

r'(?:www\.)?yt\.cyberhost\.uk',

340

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

341

r'(?:www\.)?qklhadlycap4cnod\.onion',

342

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

343

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

344

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

345

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

346

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

347

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

348

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

349

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

350

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

351

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

352

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

353

r'(?:www\.)?piped\.kavin\.rocks',

354

r'(?:www\.)?piped\.silkky\.cloud',

355

r'(?:www\.)?piped\.tokhmi\.xyz',

356

r'(?:www\.)?piped\.moomoo\.me',

357

r'(?:www\.)?il\.ax',

358

r'(?:www\.)?piped\.syncpundit\.com',

359

r'(?:www\.)?piped\.mha\.fi',

360

r'(?:www\.)?piped\.mint\.lgbt',

361

r'(?:www\.)?piped\.privacy\.com\.de',

362

)

363

364

def _initialize_consent(self):

365

cookies = self._get_cookies('https://www.youtube.com/')

366

if cookies.get('__Secure-3PSID'):

367

return

368

consent_id = None

369

consent = cookies.get('CONSENT')

370

if consent:

371

if 'YES' in consent.value:

372

return

373

consent_id = self._search_regex(

374

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

375

if not consent_id:

376

consent_id = random.randint(100, 999)

377

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

378

379

def _initialize_pref(self):

380

cookies = self._get_cookies('https://www.youtube.com/')

381

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

386

except ValueError:

387

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

388

pref.update({'hl': 'en', 'tz': 'UTC'})

389

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

390

391

def _real_initialize(self):

392

self._initialize_pref()

393

self._initialize_consent()

394

self._check_login_required()

395

396

def _check_login_required(self):

397

if self._LOGIN_REQUIRED and not self._cookies_passed:

398

self.raise_login_required('Login details are needed to download this content', method='cookies')

399

400

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

401

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

402

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

403

404

def _get_default_ytcfg(self, client='web'):

405

return copy.deepcopy(INNERTUBE_CLIENTS[client])

406

407

def _get_innertube_host(self, client='web'):

408

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

409

410

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

411

# try_get but with fallback to default ytcfg client values when present

412

_func = lambda y: try_get(y, getter, expected_type)

413

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

414

415

def _extract_client_name(self, ytcfg, default_client='web'):

416

return self._ytcfg_get_safe(

417

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

418

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

419

420

def _extract_client_version(self, ytcfg, default_client='web'):

421

return self._ytcfg_get_safe(

422

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

423

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

424

425

def _extract_api_key(self, ytcfg=None, default_client='web'):

426

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

427

428

def _extract_context(self, ytcfg=None, default_client='web'):

429

context = get_first(

430

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

431

# Enforce language and tz for extraction

432

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

433

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

439

time_now = round(time.time())

440

if self._SAPISID is None:

441

yt_cookies = self._get_cookies('https://www.youtube.com')

442

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

443

# See: https://github.com/yt-dlp/yt-dlp/issues/393

444

sapisid_cookie = dict_get(

445

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

446

if sapisid_cookie and sapisid_cookie.value:

447

self._SAPISID = sapisid_cookie.value

448

self.write_debug('Extracted SAPISID cookie')

449

# SAPISID cookie is required if not already present

450

if not yt_cookies.get('SAPISID'):

451

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

452

self._set_cookie(

453

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

454

else:

455

self._SAPISID = False

456

if not self._SAPISID:

457

return None

458

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

459

sapisidhash = hashlib.sha1(

460

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

461

return f'SAPISIDHASH {time_now}_{sapisidhash}'

462

463

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

464

note='Downloading API JSON', errnote='Unable to download API page',

465

context=None, api_key=None, api_hostname=None, default_client='web'):

466

467

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

468

data.update(query)

469

real_headers = self.generate_api_headers(default_client=default_client)

470

real_headers.update({'content-type': 'application/json'})

471

if headers:

472

real_headers.update(headers)

473

return self._download_json(

474

f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

data = self._search_regex(

481

(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',

482

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

483

if data:

484

return self._parse_json(data, item_id, fatal=fatal)

485

486

@staticmethod

487

def _extract_session_index(*data):

488

"""

489

Index of current account in account list.

490

See: https://github.com/yt-dlp/yt-dlp/pull/519

491

"""

492

for ytcfg in data:

493

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

494

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

499

if ytcfg:

500

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

505

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

506

'identity token', default=None, fatal=False)

507

508

@staticmethod

509

def _extract_account_syncid(*args):

510

"""

511

Extract syncId required to download private playlists of secondary channels

512

@params response and/or ytcfg

513

"""

514

for data in args:

515

# ytcfg includes channel_syncid if on secondary channel

516

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

521

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

522

if len(sync_ids) >= 2 and sync_ids[1]:

523

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

524

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

529

"""

530

Extracts visitorData from an API response or ytcfg

531

Appears to be used to track session state

532

"""

533

return get_first(

534

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

535

expected_type=str)

536

537

@functools.cached_property

538

def is_authenticated(self):

539

return bool(self._generate_sapisidhash_header())

540

541

def extract_ytcfg(self, video_id, webpage):

542

if not webpage:

543

return {}

544

return self._parse_json(

545

self._search_regex(

546

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

547

default='{}'), video_id, fatal=False) or {}

548

549

def generate_api_headers(

550

self, *, ytcfg=None, account_syncid=None, session_index=None,

551

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

552

553

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

554

headers = {

555

'X-YouTube-Client-Name': compat_str(

556

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

557

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

558

'Origin': origin,

559

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

560

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

561

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

562

}

563

if session_index is None:

564

session_index = self._extract_session_index(ytcfg)

565

if account_syncid or session_index is not None:

566

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

567

568

auth = self._generate_sapisidhash_header(origin)

569

if auth is not None:

570

headers['Authorization'] = auth

571

headers['X-Origin'] = origin

572

return {h: v for h, v in headers.items() if v is not None}

573

574

def _download_ytcfg(self, client, video_id):

575

url = {

576

'web': 'https://www.youtube.com',

577

'web_music': 'https://music.youtube.com',

578

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

583

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

584

return self.extract_ytcfg(video_id, webpage) or {}

585

586

@staticmethod

587

def _build_api_continuation_query(continuation, ctp=None):

588

query = {

589

'continuation': continuation

590

}

591

# TODO: Inconsistency with clickTrackingParams.

592

# Currently we have a fixed ctp contained within context (from ytcfg)

593

# and a ctp in root query for continuation.

594

if ctp:

595

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

600

next_continuation = try_get(

601

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

602

lambda x: x['continuation']['reloadContinuationData']), dict)

603

if not next_continuation:

604

return

605

continuation = next_continuation.get('continuation')

606

if not continuation:

607

return

608

ctp = next_continuation.get('clickTrackingParams')

609

return cls._build_api_continuation_query(continuation, ctp)

610

611

@classmethod

612

def _extract_continuation_ep_data(cls, continuation_ep: dict):

613

if isinstance(continuation_ep, dict):

614

continuation = try_get(

615

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

616

if not continuation:

617

return

618

ctp = continuation_ep.get('clickTrackingParams')

619

return cls._build_api_continuation_query(continuation, ctp)

620

621

@classmethod

622

def _extract_continuation(cls, renderer):

623

next_continuation = cls._extract_next_continuation_data(renderer)

624

if next_continuation:

625

return next_continuation

626

627

contents = []

628

for key in ('contents', 'items'):

629

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

630

631

for content in contents:

632

if not isinstance(content, dict):

633

continue

634

continuation_ep = try_get(

635

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

636

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

637

dict)

638

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

644

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

645

if not isinstance(alert_dict, dict):

646

continue

647

for alert in alert_dict.values():

648

alert_type = alert.get('type')

649

if not alert_type:

650

continue

651

message = cls._get_text(alert, 'text')

652

if message:

653

yield alert_type, message

654

655

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

656

errors = []

657

warnings = []

658

for alert_type, alert_message in alerts:

659

if alert_type.lower() == 'error' and fatal:

660

errors.append([alert_type, alert_message])

661

else:

662

warnings.append([alert_type, alert_message])

663

664

for alert_type, alert_message in (warnings + errors[:-1]):

665

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

666

if errors:

667

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

668

669

def _extract_and_report_alerts(self, data, *args, **kwargs):

670

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

671

672

def _extract_badges(self, renderer: dict):

673

badges = set()

674

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

675

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

676

if label:

677

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

682

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

687

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

688

obj = [obj]

689

for item in obj:

690

text = try_get(item, lambda x: x['simpleText'], compat_str)

691

if text:

692

return text

693

runs = try_get(item, lambda x: x['runs'], list) or []

694

if not runs and isinstance(item, list):

695

runs = item

696

697

runs = runs[:min(len(runs), max_runs or len(runs))]

698

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

703

count_text = self._get_text(data, *path_list) or ''

704

count = parse_count(count_text)

705

if count is None:

706

count = str_to_int(

707

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

712

"""

713

Extract thumbnails from thumbnails dict

714

@param path_list: path list to level that contains 'thumbnails' key

715

"""

716

thumbnails = []

717

for path in path_list or [()]:

718

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

719

thumbnail_url = url_or_none(thumbnail.get('url'))

720

if not thumbnail_url:

721

continue

722

# Sometimes youtube gives a wrong thumbnail URL. See:

723

# https://github.com/yt-dlp/yt-dlp/issues/233

724

# https://github.com/ytdl-org/youtube-dl/issues/28023

725

if 'maxresdefault' in thumbnail_url:

726

thumbnail_url = thumbnail_url.split('?')[0]

727

thumbnails.append({

728

'url': thumbnail_url,

729

'height': int_or_none(thumbnail.get('height')),

730

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

736

"""

737

Extracts a relative time from string and converts to dt object

738

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

743

if start:

744

return datetime_from_str(start)

745

try:

746

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

751

"""@returns (timestamp, time_text)"""

752

text = self._get_text(renderer, *path_list) or ''

753

dt = self.extract_relative_time(text)

754

timestamp = None

755

if isinstance(dt, datetime.datetime):

756

timestamp = calendar.timegm(dt.timetuple())

757

758

if timestamp is None:

759

timestamp = (

760

unified_timestamp(text) or unified_timestamp(

761

self._search_regex(

762

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

763

text.lower(), 'time text', default=None)))

764

765

if text and timestamp is None:

766

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

767

return timestamp, text

768

769

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

770

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

771

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

776

if check_get_keys is None:

777

check_get_keys = []

778

while count < retries:

779

count += 1

780

if last_error:

781

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

782

try:

783

response = self._call_api(

784

ep=ep, fatal=True, headers=headers,

785

video_id=item_id, query=query,

786

context=self._extract_context(ytcfg, default_client),

787

api_key=self._extract_api_key(ytcfg, default_client),

788

api_hostname=api_hostname, default_client=default_client,

789

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

790

except ExtractorError as e:

791

if isinstance(e.cause, network_exceptions):

792

if isinstance(e.cause, compat_HTTPError):

793

first_bytes = e.cause.read(512)

794

if not is_html(first_bytes):

795

yt_error = try_get(

796

self._parse_json(

797

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

798

lambda x: x['error']['message'], compat_str)

799

if yt_error:

800

self._report_alerts([('ERROR', yt_error)], fatal=False)

801

# Downloading page may result in intermittent 5xx HTTP error

802

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

803

# We also want to catch all other network exceptions since errors in later pages can be troublesome

804

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

805

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

806

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

818

except ExtractorError as e:

819

# YouTube servers may return errors we want to retry on in a 200 OK response

820

# See: https://github.com/yt-dlp/yt-dlp/issues/839

821

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

827

return

828

if not check_get_keys or dict_get(response, check_get_keys):

829

break

830

# Youtube sometimes sends incomplete data

831

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

832

last_error = 'Incomplete data received'

833

if count >= retries:

834

if fatal:

835

raise ExtractorError(last_error)

836

else:

837

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

843

return re.match(r'https?://music\.youtube\.com/', url) is not None

844

845

def _extract_video(self, renderer):

846

video_id = renderer.get('videoId')

847

title = self._get_text(renderer, 'title')

848

description = self._get_text(renderer, 'descriptionSnippet')

849

duration = parse_duration(self._get_text(

850

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

851

if duration is None:

852

duration = parse_duration(self._search_regex(

853

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

854

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

855

video_id, default=None, group='duration'))

856

857

view_count = self._get_count(renderer, 'viewCountText')

858

859

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

860

channel_id = traverse_obj(

861

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

862

expected_type=str, get_all=False)

863

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

864

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

865

overlay_style = traverse_obj(

866

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

867

get_all=False, expected_type=str)

868

badges = self._extract_badges(renderer)

869

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

870

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

871

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

872

expected_type=str)) or ''

873

url = f'https://www.youtube.com/watch?v={video_id}'

874

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

875

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

884

'duration': duration,

885

'view_count': view_count,

886

'uploader': uploader,

887

'channel_id': channel_id,

888

'thumbnails': thumbnails,

889

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

890

if self._configuration_arg('approximate_date', ie_key='youtubetab')

891

else None),

892

'live_status': ('is_upcoming' if scheduled_timestamp is not None

893

else 'was_live' if 'streamed' in time_text.lower()

894

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

895

else None),

896

'release_timestamp': scheduled_timestamp,

897

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

902

IE_DESC = 'YouTube'

903

_VALID_URL = r"""(?x)^

904

(

905

(?:https?://|//) # http(s):// or protocol-independent URL

906

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

907

(?:www\.)?deturl\.com/www\.youtube\.com|

908

(?:www\.)?pwnyoutube\.com|

909

(?:www\.)?hooktube\.com|

910

(?:www\.)?yourepeat\.com|

911

tube\.majestyc\.net|

912

%(invidious)s|

913

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

914

(?:.*?\#/)? # handle anchor (#/) redirect urls

915

(?: # the various things that can precede the ID:

916

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

917

|(?: # or the v= param in all its forms

918

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

919

(?:\?|\#!?) # the params delimiter ? or # or #!

920

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

926

vid\.plus| # or vid.plus/xxxx

927

zwearz\.com/watch| # or zwearz.com/watch/xxxx

928

%(invidious)s

929

)/

930

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

931

)

932

)? # all until now is optional -> you can pass the naked ID

933

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

934

(?(1).+)? # if we found the ID, everything can follow

935

(?:\#|$)""" % {

936

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

937

}

938

_PLAYER_INFO_RE = (

939

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

940

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

941

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

942

)

943

_formats = {

944

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

945

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

946

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

947

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

948

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

949

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

950

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

951

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

952

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

953

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

954

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

955

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

956

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

957

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

958

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

959

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

960

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

961

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

966

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

967

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

968

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

969

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

970

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

971

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

972

973

# Apple HTTP Live Streaming

974

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

975

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

976

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

977

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

978

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

979

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

980

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

981

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

982

983

# DASH mp4 video

984

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

988

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

990

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

991

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

992

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

993

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

994

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

995

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

996

997

# Dash mp4 audio

998

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

999

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1000

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1001

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1002

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1003

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1004

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1005

1006

# Dash webm

1007

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1008

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1009

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1010

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1011

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1012

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1013

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1014

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1016

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1019

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1020

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1023

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1024

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1025

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1026

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1027

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1028

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1029

1030

# Dash webm audio

1031

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1032

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1033

1034

# Dash webm audio with opus inside

1035

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1036

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1037

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1038

1039

# RTMP (unnamed)

1040

'_rtmp': {'protocol': 'rtmp'},

1041

1042

# av01 video only formats sometimes served with "unknown" codecs

1043

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1044

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1045

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1046

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1047

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1048

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1049

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1050

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1051

}

1052

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1064

'uploader': 'Philipp Hagemeister',

1065

'uploader_id': 'phihag',

1066

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1067

'channel': 'Philipp Hagemeister',

1068

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1069

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1070

'upload_date': '20121002',

1071

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1072

'categories': ['Science & Technology'],

1073

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1078

'playable_in_embed': True,

1079

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1080

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1089

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1094

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1095

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1096

'uploader': 'SET India',

1097

'uploader_id': 'setindia',

1098

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1099

'age_limit': 18,

1100

},

1101

'skip': 'Private video',

1102

},

1103

{

1104

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1105

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1110

'uploader': 'Philipp Hagemeister',

1111

'uploader_id': 'phihag',

1112

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1113

'channel': 'Philipp Hagemeister',

1114

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1115

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1116

'upload_date': '20121002',

1117

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1118

'categories': ['Science & Technology'],

1119

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1124

'playable_in_embed': True,

1125

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1126

'live_status': 'not_live',

1127

'age_limit': 0,

1128

'channel_follower_count': int

1129

},

1130

'params': {

1131

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1136

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1141

'uploader_id': '8KVIDEO',

1142

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1143

'description': '',

1144

'uploader': '8KVIDEO',

1145

'title': 'UHDTV TEST 8K VIDEO.mp4'

1146

},

1147

'params': {

1148

'youtube_include_dash_manifest': True,

1149

'format': '141',

1150

},

1151

'skip': 'format 141 not served anymore',

1152

},

1153

# DASH manifest with encrypted signature

1154

{

1155

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1160

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1161

'duration': 244,

1162

'uploader': 'AfrojackVEVO',

1163

'uploader_id': 'AfrojackVEVO',

1164

'upload_date': '20131011',

1165

'abr': 129.495,

1166

'like_count': int,

1167

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1168

'playable_in_embed': True,

1169

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1170

'view_count': int,

1171

'track': 'The Spark',

1172

'live_status': 'not_live',

1173

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1174

'channel': 'Afrojack',

1175

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1176

'tags': 'count:19',

1177

'availability': 'public',

1178

'categories': ['Music'],

1179

'age_limit': 0,

1180

'alt_title': 'The Spark',

1181

'channel_follower_count': int

1182

},

1183

'params': {

1184

'youtube_include_dash_manifest': True,

1185

'format': '141/bestaudio[ext=m4a]',

1186

},

1187

},

1188

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1189

{

1190

'note': 'Embed allowed age-gate video',

1191

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1196

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1197

'duration': 142,

1198

'uploader': 'The Witcher',

1199

'uploader_id': 'WitcherGame',

1200

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1201

'upload_date': '20140605',

1202

'age_limit': 18,

1203

'categories': ['Gaming'],

1204

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1205

'availability': 'needs_auth',

1206

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1207

'like_count': int,

1208

'channel': 'The Witcher',

1209

'live_status': 'not_live',

1210

'tags': 'count:17',

1211

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1212

'playable_in_embed': True,

1213

'view_count': int,

1214

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1219

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1224

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1225

'upload_date': '20200408',

1226

'uploader_id': 'FlyingKitty900',

1227

'uploader': 'FlyingKitty',

1228

'age_limit': 18,

1229

'availability': 'needs_auth',

1230

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1231

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1232

'channel': 'FlyingKitty',

1233

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1234

'view_count': int,

1235

'categories': ['Entertainment'],

1236

'live_status': 'not_live',

1237

'tags': ['Flyingkitty', 'godzilla 2'],

1238

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1239

'like_count': int,

1240

'duration': 177,

1241

'playable_in_embed': True,

1242

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1247

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1248

'info_dict': {

1249

'id': 'Tq92D6wQ1mg',

1250

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1251

'ext': 'mp4',

1252

'upload_date': '20191228',

1253

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1254

'uploader': 'Projekt Melody',

1255

'description': 'md5:17eccca93a786d51bc67646756894066',

1256

'age_limit': 18,

1257

'like_count': int,

1258

'availability': 'needs_auth',

1259

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1260

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'view_count': int,

1262

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1263

'channel': 'Projekt Melody',

1264

'live_status': 'not_live',

1265

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1266

'playable_in_embed': True,

1267

'categories': ['Entertainment'],

1268

'duration': 106,

1269

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1270

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1275

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1280

'uploader': 'Herr Lurik',

1281

'uploader_id': 'st3in234',

1282

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1283

'upload_date': '20130730',

1284

'track': 'Such mich find mich',

1285

'age_limit': 0,

1286

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1287

'like_count': int,

1288

'playable_in_embed': False,

1289

'creator': 'OOMPH!',

1290

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1291

'view_count': int,

1292

'alt_title': 'Such mich find mich',

1293

'duration': 210,

1294

'channel': 'Herr Lurik',

1295

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1296

'categories': ['Music'],

1297

'availability': 'public',

1298

'uploader_url': 'http://www.youtube.com/user/st3in234',

1299

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1300

'live_status': 'not_live',

1301

'artist': 'OOMPH!',

1302

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1307

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1308

'only_matching': True,

1309

},

1310

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1311

# YouTube Red ad is not captured for creator

1312

{

1313

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1319

'uploader_id': 'deadmau5',

1320

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1321

'creator': 'deadmau5',

1322

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1323

'uploader': 'deadmau5',

1324

'title': 'Deadmau5 - Some Chords (HD)',

1325

'alt_title': 'Some Chords',

1326

'availability': 'public',

1327

'tags': 'count:14',

1328

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1329

'view_count': int,

1330

'live_status': 'not_live',

1331

'channel': 'deadmau5',

1332

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1333

'like_count': int,

1334

'track': 'Some Chords',

1335

'artist': 'deadmau5',

1336

'playable_in_embed': True,

1337

'age_limit': 0,

1338

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1339

'categories': ['Music'],

1340

'album': 'Some Chords',

1341

'channel_follower_count': int

1342

},

1343

'expected_warnings': [

1344

'DASH manifest missing',

1345

]

1346

},

1347

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1348

{

1349

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1355

'uploader_id': 'olympic',

1356

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1357

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1358

'uploader': 'Olympics',

1359

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1360

'like_count': int,

1361

'release_timestamp': 1343767800,

1362

'playable_in_embed': True,

1363

'categories': ['Sports'],

1364

'release_date': '20120731',

1365

'channel': 'Olympics',

1366

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1367

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1368

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1369

'age_limit': 0,

1370

'availability': 'public',

1371

'live_status': 'was_live',

1372

'view_count': int,

1373

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1374

'channel_follower_count': int

1375

},

1376

'params': {

1377

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1387

'duration': 85,

1388

'upload_date': '20110310',

1389

'uploader_id': 'AllenMeow',

1390

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1391

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1392

'uploader': '孫ᄋᄅ',

1393

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1394

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1399

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1400

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1401

'view_count': int,

1402

'categories': ['People & Blogs'],

1403

'like_count': int,

1404

'live_status': 'not_live',

1405

'availability': 'unlisted',

1406

'channel_follower_count': int

1407

},

1408

},

1409

# url_encoded_fmt_stream_map is empty string

1410

{

1411

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1416

'description': '',

1417

'upload_date': '20150404',

1418

'uploader_id': 'spbelect',

1419

'uploader': 'Наблюдатели Петербурга',

1420

},

1421

'params': {

1422

'skip_download': 'requires avconv',

1423

},

1424

'skip': 'This live event has ended.',

1425

},

1426

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1427

{

1428

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1433

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1434

'duration': 220,

1435

'upload_date': '20150625',

1436

'uploader_id': 'dorappi2000',

1437

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1438

'uploader': 'dorappi2000',

1439

'formats': 'mincount:31',

1440

},

1441

'skip': 'not actual anymore',

1442

},

1443

# DASH manifest with segment_list

1444

{

1445

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1446

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1451

'uploader': 'Airtek',

1452

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1453

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1454

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1455

},

1456

'params': {

1457

'youtube_include_dash_manifest': True,

1458

'format': '135', # bestvideo

1459

},

1460

'skip': 'This live event has ended.',

1461

},

1462

{

1463

# Multifeed videos (multiple cameras), URL is for Main Camera

1464

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1465

'info_dict': {

1466

'id': 'jvGDaLqkpTg',

1467

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1475

'description': 'md5:e03b909557865076822aa169218d6a5d',

1476

'duration': 10643,

1477

'upload_date': '20161111',

1478

'uploader': 'Team PGP',

1479

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1480

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1487

'description': 'md5:e03b909557865076822aa169218d6a5d',

1488

'duration': 10991,

1489

'upload_date': '20161111',

1490

'uploader': 'Team PGP',

1491

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1492

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1499

'description': 'md5:e03b909557865076822aa169218d6a5d',

1500

'duration': 10995,

1501

'upload_date': '20161111',

1502

'uploader': 'Team PGP',

1503

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1504

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1511

'description': 'md5:e03b909557865076822aa169218d6a5d',

1512

'duration': 10990,

1513

'upload_date': '20161111',

1514

'uploader': 'Team PGP',

1515

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1516

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1521

},

1522

'skip': 'Not multifeed anymore',

1523

},

1524

{

1525

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1526

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1527

'info_dict': {

1528

'id': 'gVfLd0zydlo',

1529

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1530

},

1531

'playlist_count': 2,

1532

'skip': 'Not multifeed anymore',

1533

},

1534

{

1535

'url': 'https://vid.plus/FlRa-iH7PGw',

1536

'only_matching': True,

1537

},

1538

{

1539

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1540

'only_matching': True,

1541

},

1542

{

1543

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1544

# Also tests cut-off URL expansion in video description (see

1545

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1546

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1547

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1552

'alt_title': 'Dark Walk',

1553

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1554

'duration': 133,

1555

'upload_date': '20151119',

1556

'uploader_id': 'IronSoulElf',

1557

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1558

'uploader': 'IronSoulElf',

1559

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1560

'track': 'Dark Walk',

1561

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1562

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1563

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1564

'categories': ['Film & Animation'],

1565

'view_count': int,

1566

'live_status': 'not_live',

1567

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1568

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1569

'tags': 'count:13',

1570

'availability': 'public',

1571

'channel': 'IronSoulElf',

1572

'playable_in_embed': True,

1573

'like_count': int,

1574

'age_limit': 0,

1575

'channel_follower_count': int

1576

},

1577

'params': {

1578

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1583

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1584

'only_matching': True,

1585

},

1586

{

1587

# Video with yt:stretch=17:0

1588

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1593

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1594

'upload_date': '20151107',

1595

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1596

'uploader': 'CH GAMER DROID',

1597

},

1598

'params': {

1599

'skip_download': True,

1600

},

1601

'skip': 'This video does not exist.',

1602

},

1603

{

1604

# Video with incomplete 'yt:stretch=16:'

1605

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1606

'only_matching': True,

1607

},

1608

{

1609

# Video licensed under Creative Commons

1610

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1615

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1616

'duration': 721,

1617

'upload_date': '20150128',

1618

'uploader_id': 'BerkmanCenter',

1619

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1620

'uploader': 'The Berkman Klein Center for Internet & Society',

1621

'license': 'Creative Commons Attribution license (reuse allowed)',

1622

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1623

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1624

'like_count': int,

1625

'age_limit': 0,

1626

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1627

'channel': 'The Berkman Klein Center for Internet & Society',

1628

'availability': 'public',

1629

'view_count': int,

1630

'categories': ['Education'],

1631

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1632

'live_status': 'not_live',

1633

'playable_in_embed': True,

1634

'channel_follower_count': int

1635

},

1636

'params': {

1637

'skip_download': True,

},

},

{

# Channel-like uploader_url

1642

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1647

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1648

'duration': 4060,

1649

'upload_date': '20151120',

1650

'uploader': 'Bernie Sanders',

1651

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1653

'license': 'Creative Commons Attribution license (reuse allowed)',

1654

'playable_in_embed': True,

1655

'tags': 'count:12',

1656

'like_count': int,

1657

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1658

'age_limit': 0,

1659

'availability': 'public',

1660

'categories': ['News & Politics'],

1661

'channel': 'Bernie Sanders',

1662

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1663

'view_count': int,

1664

'live_status': 'not_live',

1665

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1666

'channel_follower_count': int

1667

},

1668

'params': {

1669

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1674

'only_matching': True,

1675

},

1676

{

1677

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1678

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1679

'only_matching': True,

1680

},

1681

{

1682

# Rental video preview

1683

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1688

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1689

'upload_date': '20150811',

1690

'uploader': 'FlixMatrix',

1691

'uploader_id': 'FlixMatrixKaravan',

1692

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1693

'license': 'Standard YouTube License',

1694

},

1695

'params': {

1696

'skip_download': True,

1697

},

1698

'skip': 'This video is not available.',

1699

},

1700

{

1701

# YouTube Red video with episode data

1702

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1707

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1708

'duration': 2085,

1709

'upload_date': '20170118',

1710

'uploader': 'Vsauce',

1711

'uploader_id': 'Vsauce',

1712

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1713

'series': 'Mind Field',

1714

'season_number': 1,

1715

'episode_number': 1,

1716

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1717

'tags': 'count:12',

1718

'view_count': int,

1719

'availability': 'public',

1720

'age_limit': 0,

1721

'channel': 'Vsauce',

1722

'episode': 'Episode 1',

1723

'categories': ['Entertainment'],

1724

'season': 'Season 1',

1725

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1726

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1727

'like_count': int,

1728

'playable_in_embed': True,

1729

'live_status': 'not_live',

1730

'channel_follower_count': int

1731

},

1732

'params': {

1733

'skip_download': True,

1734

},

1735

'expected_warnings': [

1736

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1741

# as inappropriate or offensive to some audiences.

1742

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1747

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1748

'duration': 965,

1749

'upload_date': '20140124',

1750

'uploader': 'New Century Foundation',

1751

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1752

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1753

},

1754

'params': {

1755

'skip_download': True,

1756

},

1757

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1762

'only_matching': True,

1763

},

1764

{

1765

# geo restricted to JP

1766

'url': 'sJL6WA-aGkQ',

1767

'only_matching': True,

1768

},

1769

{

1770

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1771

'only_matching': True,

1772

},

1773

{

1774

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1775

'only_matching': True,

1776

},

1777

{

1778

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1779

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1780

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1785

'only_matching': True,

1786

},

1787

{

1788

# Video with unsupported adaptive stream type formats

1789

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1794

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1795

'duration': 433,

1796

'upload_date': '20130923',

1797

'uploader': 'Amelia Putri Harwita',

1798

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1800

'formats': 'maxcount:10',

1801

},

1802

'params': {

1803

'skip_download': True,

1804

'youtube_include_dash_manifest': False,

1805

},

1806

'skip': 'not actual anymore',

1807

},

1808

{

1809

# Youtube Music Auto-generated description

1810

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1815

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1816

'upload_date': '20190312',

1817

'uploader': 'Stephen - Topic',

1818

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1819

'artist': 'Stephen',

1820

'track': 'Voyeur Girl',

1821

'album': 'it\'s too much love to know my dear',

1822

'release_date': '20190313',

1823

'release_year': 2019,

1824

'alt_title': 'Voyeur Girl',

1825

'view_count': int,

1826

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1827

'playable_in_embed': True,

1828

'like_count': int,

1829

'categories': ['Music'],

1830

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1831

'channel': 'Stephen',

1832

'availability': 'public',

1833

'creator': 'Stephen',

1834

'duration': 169,

1835

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1836

'age_limit': 0,

1837

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1838

'tags': 'count:11',

1839

'live_status': 'not_live',

1840

'channel_follower_count': int

1841

},

1842

'params': {

1843

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1848

'only_matching': True,

1849

},

1850

{

1851

# invalid -> valid video id redirection

1852

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1857

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1858

'upload_date': '20090125',

1859

'uploader': 'Prochorowka',

1860

'uploader_id': 'Prochorowka',

1861

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1862

'artist': 'Panjabi MC',

1863

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1864

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1865

},

1866

'params': {

1867

'skip_download': True,

1868

},

1869

'skip': 'Video unavailable',

1870

},

1871

{

1872

# empty description results in an empty string

1873

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1880

'uploader_id': 'ElevageOrVert',

1881

'uploader': 'ElevageOrVert',

1882

'view_count': int,

1883

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1884

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1885

'like_count': int,

1886

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1887

'tags': [],

1888

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1889

'availability': 'public',

1890

'age_limit': 0,

1891

'categories': ['Pets & Animals'],

1892

'duration': 7,

1893

'playable_in_embed': True,

1894

'live_status': 'not_live',

1895

'channel': 'ElevageOrVert',

1896

'channel_follower_count': int

1897

},

1898

'params': {

1899

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1904

# see [2] for an example with '};' inside ytInitialPlayerResponse

1905

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1906

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1907

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1912

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1913

'upload_date': '20130831',

1914

'uploader_id': 'kudvenkat',

1915

'uploader': 'kudvenkat',

1916

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1917

'like_count': int,

1918

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1919

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1920

'live_status': 'not_live',

1921

'categories': ['Education'],

1922

'availability': 'public',

1923

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1924

'tags': 'count:12',

1925

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1930

'channel_follower_count': int

1931

},

1932

'params': {

1933

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1938

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1939

'only_matching': True,

1940

},

1941

{

1942

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1943

'only_matching': True,

1944

},

1945

{

1946

# https://github.com/ytdl-org/youtube-dl/pull/28094

1947

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1953

'upload_date': '20141120',

1954

'uploader': 'The Cinematic Orchestra - Topic',

1955

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1956

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1957

'artist': 'The Cinematic Orchestra',

1958

'track': 'Burn Out',

1959

'album': 'Every Day',

1960

'like_count': int,

1961

'live_status': 'not_live',

1962

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'creator': 'The Cinematic Orchestra',

1968

'channel': 'The Cinematic Orchestra',

1969

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1970

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1971

'availability': 'public',

1972

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1973

'categories': ['Music'],

1974

'playable_in_embed': True,

1975

'channel_follower_count': int

1976

},

1977

'params': {

1978

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1983

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1984

'only_matching': True,

1985

},

1986

{

1987

# controversial video, requires bpctr/contentCheckOk

1988

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1993

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1994

'uploader': 'CBS Mornings',

1995

'uploader_id': 'CBSThisMorning',

1996

'upload_date': '20140716',

1997

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1998

'duration': 170,

1999

'categories': ['News & Politics'],

2000

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2001

'view_count': int,

2002

'channel': 'CBS Mornings',

2003

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2004

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2005

'age_limit': 18,

2006

'availability': 'needs_auth',

2007

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2008

'like_count': int,

2009

'live_status': 'not_live',

2010

'playable_in_embed': True,

2011

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2016

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2021

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2022

'upload_date': '20201120',

2023

'uploader': 'Walk around Japan',

2024

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'duration': 1456,

2027

'categories': ['Travel & Events'],

2028

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2029

'view_count': int,

2030

'channel': 'Walk around Japan',

2031

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2032

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2033

'age_limit': 0,

2034

'availability': 'public',

2035

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2036

'live_status': 'not_live',

2037

'playable_in_embed': True,

2038

'channel_follower_count': int

2039

},

2040

'params': {

2041

'skip_download': True,

2042

},

2043

}, {

2044

# Has multiple audio streams

2045

'url': 'WaOKSUlf4TM',

2046

'only_matching': True

2047

}, {

2048

# Requires Premium: has format 141 when requested using YTM url

2049

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2050

'only_matching': True

2051

}, {

2052

# multiple subtitles with same lang_code

2053

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2054

'only_matching': True,

2055

}, {

2056

# Force use android client fallback

2057

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2058

'info_dict': {

2059

'id': 'YOelRv7fMxY',

2060

'title': 'DIGGING A SECRET TUNNEL Part 1',

2061

'ext': '3gp',

2062

'upload_date': '20210624',

2063

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2064

'uploader': 'colinfurze',

2065

'uploader_id': 'colinfurze',

2066

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2067

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2068

'duration': 596,

2069

'categories': ['Entertainment'],

2070

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2071

'view_count': int,

2072

'channel': 'colinfurze',

2073

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2074

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2075

'age_limit': 0,

2076

'availability': 'public',

2077

'like_count': int,

2078

'live_status': 'not_live',

2079

'playable_in_embed': True,

2080

'channel_follower_count': int

2081

},

2082

'params': {

2083

'format': '17', # 3gp format available on android

2084

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2089

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2090

'only_matching': True,

2091

'params': {

2092

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2097

'only_matching': True,

2098

}, {

2099

'note': 'Storyboards',

2100

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2106

'uploader_id': 'scishow',

2107

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2108

'upload_date': '20140324',

2109

'uploader': 'SciShow',

2110

'like_count': int,

2111

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2112

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2113

'view_count': int,

2114

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2115

'playable_in_embed': True,

2116

'tags': 'count:12',

2117

'uploader_url': 'http://www.youtube.com/user/scishow',

2118

'availability': 'public',

2119

'channel': 'SciShow',

2120

'live_status': 'not_live',

2121

'duration': 248,

2122

'categories': ['Education'],

2123

'age_limit': 0,

2124

'channel_follower_count': int

2125

}, 'params': {'format': 'mhtml', 'skip_download': True}

2126

}, {

2127

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2128

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2133

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2134

'uploader': 'Leon Nguyen',

2135

'uploader_id': 'VNSXIII',

2136

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2137

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2138

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2143

'tags': 'count:23',

2144

'playable_in_embed': True,

2145

'live_status': 'not_live',

2146

'upload_date': '20220103',

2147

'like_count': int,

2148

'availability': 'public',

2149

'channel': 'Leon Nguyen',

2150

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2151

'channel_follower_count': int

2152

}

2153

}, {

2154

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2155

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2160

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2161

'uploader': 'Quackity',

2162

'uploader_id': 'QuackityHQ',

2163

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2164

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2165

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2170

'tags': 'count:26',

2171

'playable_in_embed': True,

2172

'live_status': 'not_live',

2173

'release_timestamp': 1641172509,

2174

'release_date': '20220103',

2175

'upload_date': '20220103',

2176

'like_count': int,

2177

'availability': 'public',

2178

'channel': 'Quackity',

2179

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2180

'channel_follower_count': int

2181

}

2182

},

2183

{ # continuous livestream. Microformat upload date should be preferred.

2184

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2185

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2186

'info_dict': {

2187

'id': 'kgx4WGK0oNU',

2188

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2189

'ext': 'mp4',

2190

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2191

'availability': 'public',

2192

'age_limit': 0,

2193

'release_timestamp': 1637975704,

2194

'upload_date': '20210619',

2195

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2196

'live_status': 'is_live',

2197

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2198

'uploader': '阿鲍Abao',

2199

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2200

'channel': 'Abao in Tokyo',

2201

'channel_follower_count': int,

2202

'release_date': '20211127',

2203

'tags': 'count:39',

2204

'categories': ['People & Blogs'],

2205

'like_count': int,

2206

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2207

'view_count': int,

2208

'playable_in_embed': True,

2209

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2210

},

2211

'params': {'skip_download': True}

2212

}, {

2213

# Story. Requires specific player params to work.

2214

# Note: stories get removed after some period of time

2215

'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',

'info_dict': {

'id': 'yN3x1t3sieA',

'ext': 'mp4',

'uploader': 'Linus Tech Tips',

2220

'duration': 13,

2221

'channel': 'Linus Tech Tips',

2222

'playable_in_embed': True,

2223

'tags': [],

2224

'age_limit': 0,

2225

'uploader_url': 'http://www.youtube.com/user/LinusTechTips',

2226

'upload_date': '20220402',

2227

'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',

2228

'title': 'Story',

2229

'live_status': 'not_live',

2230

'uploader_id': 'LinusTechTips',

2231

'view_count': int,

2232

'description': '',

2233

'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',

2234

'categories': ['Science & Technology'],

2235

'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',

2236

'availability': 'unlisted',

}

}

]

@classmethod

def suitable(cls, url):

2243

from ..utils import parse_qs

2244

2245

qs = parse_qs(url)

2246

if qs.get('list', [None])[0]:

2247

return False

2248

return super().suitable(url)

2249

2250

def __init__(self, *args, **kwargs):

2251

super().__init__(*args, **kwargs)

2252

self._code_cache = {}

2253

self._player_cache = {}

2254

2255

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2256

lock = threading.Lock()

2257

2258

is_live = True

2259

start_time = time.time()

2260

formats = [f for f in formats if f.get('is_from_start')]

2261

2262

def refetch_manifest(format_id, delay):

2263

nonlocal formats, start_time, is_live

2264

if time.time() <= start_time + delay:

2265

return

2266

2267

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2268

video_details = traverse_obj(

2269

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2270

microformats = traverse_obj(

2271

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2272

expected_type=dict, default=[])

2273

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2274

start_time = time.time()

2275

2276

def mpd_feed(format_id, delay):

2277

"""

2278

@returns (manifest_url, manifest_stream_number, is_live) or None

2279

"""

2280

with lock:

2281

refetch_manifest(format_id, delay)

2282

2283

f = next((f for f in formats if f['format_id'] == format_id), None)

2284

if not f:

2285

if not is_live:

2286

self.to_screen(f'{video_id}: Video is no longer live')

2287

else:

2288

self.report_warning(

2289

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2290

return None

2291

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2296

f['fragments'] = functools.partial(

2297

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2298

2299

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2300

FETCH_SPAN, MAX_DURATION = 5, 432000

2301

2302

mpd_url, stream_number, is_live = None, None, True

2303

2304

begin_index = 0

2305

download_start_time = ctx.get('start') or time.time()

2306

2307

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2308

if lack_early_segments:

2309

self.report_warning(bug_reports_message(

2310

'Starting download from the last 120 hours of the live stream since '

2311

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2312

lack_early_segments = True

2313

2314

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2315

fragments, fragment_base_url = None, None

2316

2317

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2318

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2319

# Obtain from MPD's maximum seq value

2320

old_mpd_url = mpd_url

2321

last_error = ctx.pop('last_error', None)

2322

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2323

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2324

or (mpd_url, stream_number, False))

2325

if not refresh_sequence:

2326

if expire_fast and not is_live:

2327

return False, last_seq

2328

elif old_mpd_url == mpd_url:

2329

return True, last_seq

2330

try:

2331

fmts, _ = self._extract_mpd_formats_and_subtitles(

2332

mpd_url, None, note=False, errnote=False, fatal=False)

2333

except ExtractorError:

2334

fmts = None

2335

if not fmts:

2336

no_fragment_score += 2

2337

return False, last_seq

2338

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2339

fragments = fmt_info['fragments']

2340

fragment_base_url = fmt_info['fragment_base_url']

2341

assert fragment_base_url

2342

2343

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2344

return True, _last_seq

2345

2346

while is_live:

2347

fetch_time = time.time()

2348

if no_fragment_score > 30:

2349

return

2350

if last_segment_url:

2351

# Obtain from "X-Head-Seqnum" header value from each segment

2352

try:

2353

urlh = self._request_webpage(

2354

last_segment_url, None, note=False, errnote=False, fatal=False)

2355

except ExtractorError:

2356

urlh = None

2357

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2358

if last_seq is None:

2359

no_fragment_score += 2

2360

last_segment_url = None

2361

continue

2362

else:

2363

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2364

no_fragment_score += 2

2365

if not should_continue:

2366

continue

2367

2368

if known_idx > last_seq:

2369

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2375

# skip from the start when it's negative value

2376

known_idx = last_seq + begin_index

2377

if lack_early_segments:

2378

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2379

try:

2380

for idx in range(known_idx, last_seq):

2381

# do not update sequence here or you'll get skipped some part of it

2382

should_continue, _ = _extract_sequence_from_mpd(False, False)

2383

if not should_continue:

2384

known_idx = idx - 1

2385

raise ExtractorError('breaking out of outer loop')

2386

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2387

yield {

2388

'url': last_segment_url,

2389

}

2390

if known_idx == last_seq:

2391

no_fragment_score += 5

2392

else:

2393

no_fragment_score = 0

2394

known_idx = last_seq

2395

except ExtractorError:

2396

continue

2397

2398

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2399

2400

def _extract_player_url(self, *ytcfgs, webpage=None):

2401

player_url = traverse_obj(

2402

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2403

get_all=False, expected_type=compat_str)

2404

if not player_url:

2405

return

2406

return urljoin('https://www.youtube.com', player_url)

2407

2408

def _download_player_url(self, video_id, fatal=False):

2409

res = self._download_webpage(

2410

'https://www.youtube.com/iframe_api',

2411

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2412

if res:

2413

player_version = self._search_regex(

2414

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2415

if player_version:

2416

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2417

2418

def _signature_cache_id(self, example_sig):

2419

""" Return a string representation of a signature """

2420

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2421

2422

@classmethod

2423

def _extract_player_info(cls, player_url):

2424

for player_re in cls._PLAYER_INFO_RE:

2425

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2430

return id_m.group('id')

2431

2432

def _load_player(self, video_id, player_url, fatal=True):

2433

player_id = self._extract_player_info(player_url)

2434

if player_id not in self._code_cache:

2435

code = self._download_webpage(

2436

player_url, video_id, fatal=fatal,

2437

note='Downloading player ' + player_id,

2438

errnote='Download of %s failed' % player_url)

2439

if code:

2440

self._code_cache[player_id] = code

2441

return self._code_cache.get(player_id)

2442

2443

def _extract_signature_function(self, video_id, player_url, example_sig):

2444

player_id = self._extract_player_info(player_url)

2445

2446

# Read from filesystem cache

2447

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2448

assert os.path.basename(func_id) == func_id

2449

2450

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2451

if cache_spec is not None:

2452

return lambda s: ''.join(s[i] for i in cache_spec)

2453

2454

code = self._load_player(video_id, player_url)

2455

if code:

2456

res = self._parse_sig_js(code)

2457

2458

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2459

cache_res = res(test_string)

2460

cache_spec = [ord(c) for c in cache_res]

2461

2462

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2463

return res

2464

2465

def _print_sig_code(self, func, example_sig):

2466

if not self.get_param('youtube_print_sig_code'):

2467

return

2468

2469

def gen_sig_code(idxs):

2470

def _genslice(start, end, step):

2471

starts = '' if start == 0 else str(start)

2472

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2473

steps = '' if step == 1 else (':%d' % step)

2474

return f's[{starts}{ends}{steps}]'

2475

2476

step = None

2477

# Quelch pyflakes warnings - start will be set when step is set

2478

start = '(Never used)'

2479

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2484

step = None

2485

continue

2486

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2496

2497

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2498

cache_res = func(test_string)

2499

cache_spec = [ord(c) for c in cache_res]

2500

expr_code = ' + '.join(gen_sig_code(cache_spec))

2501

signature_id_tuple = '(%s)' % (

2502

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2503

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2504

' return %s\n') % (signature_id_tuple, expr_code)

2505

self.to_screen('Extracted signature function:\n' + code)

2506

2507

def _parse_sig_js(self, jscode):

2508

funcname = self._search_regex(

2509

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2510

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2511

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2512

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2513

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2514

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2515

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2516

# Obsolete patterns

2517

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2518

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2519

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2520

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2521

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2522

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2523

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2524

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2525

jscode, 'Initial JS player signature function name', group='sig')

2526

2527

jsi = JSInterpreter(jscode)

2528

initial_function = jsi.extract_function(funcname)

2529

return lambda s: initial_function([s])

2530

2531

def _decrypt_signature(self, s, video_id, player_url):

2532

"""Turn the encrypted s field into a working signature"""

2533

try:

2534

player_id = (player_url, self._signature_cache_id(s))

2535

if player_id not in self._player_cache:

2536

func = self._extract_signature_function(video_id, player_url, s)

2537

self._player_cache[player_id] = func

2538

func = self._player_cache[player_id]

2539

self._print_sig_code(func, s)

2540

return func(s)

2541

except Exception as e:

2542

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2543

2544

def _decrypt_nsig(self, s, video_id, player_url):

2545

"""Turn the encrypted n field into a working signature"""

2546

if player_url is None:

2547

raise ExtractorError('Cannot decrypt nsig without player_url')

2548

player_url = urljoin('https://www.youtube.com', player_url)

2549

2550

sig_id = ('nsig_value', s)

2551

if sig_id in self._player_cache:

2552

return self._player_cache[sig_id]

2553

2554

try:

2555

player_id = ('nsig', player_url)

2556

if player_id not in self._player_cache:

2557

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2558

func = self._player_cache[player_id]

2559

self._player_cache[sig_id] = func(s)

2560

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2561

return self._player_cache[sig_id]

2562

except Exception as e:

2563

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2564

2565

def _extract_n_function_name(self, jscode):

2566

nfunc, idx = self._search_regex(

2567

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2568

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2569

if not idx:

2570

return nfunc

2571

return json.loads(js_to_json(self._search_regex(

2572

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2573

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2574

2575

def _extract_n_function(self, video_id, player_url):

2576

player_id = self._extract_player_info(player_url)

2577

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2578

2579

if func_code:

2580

jsi = JSInterpreter(func_code)

2581

else:

2582

jscode = self._load_player(video_id, player_url)

2583

funcname = self._extract_n_function_name(jscode)

2584

jsi = JSInterpreter(jscode)

2585

func_code = jsi.extract_function_code(funcname)

2586

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2587

2588

if self.get_param('youtube_print_sig_code'):

2589

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2590

2591

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2592

2593

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2594

"""

2595

Extract signatureTimestamp (sts)

2596

Required to tell API what sig/player version is in use.

2597

"""

2598

sts = None

2599

if isinstance(ytcfg, dict):

2600

sts = int_or_none(ytcfg.get('STS'))

2601

2602

if not sts:

2603

# Attempt to extract from player

2604

if player_url is None:

2605

error_msg = 'Cannot extract signature timestamp without player_url.'

2606

if fatal:

2607

raise ExtractorError(error_msg)

2608

self.report_warning(error_msg)

2609

return

2610

code = self._load_player(video_id, player_url, fatal=fatal)

2611

if code:

2612

sts = int_or_none(self._search_regex(

2613

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2614

'JS player signature timestamp', group='sts', fatal=fatal))

2615

return sts

2616

2617

def _mark_watched(self, video_id, player_responses):

2618

playback_url = get_first(

2619

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2620

expected_type=url_or_none)

2621

if not playback_url:

2622

self.report_warning('Unable to mark watched')

2623

return

2624

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2625

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2626

2627

# cpn generation algorithm is reverse engineered from base.js.

2628

# In fact it works even with dummy cpn.

2629

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2630

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2637

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2638

2639

self._download_webpage(

2640

playback_url, video_id, 'Marking watched',

2641

'Unable to mark watched', fatal=False)

2642

2643

@staticmethod

2644

def _extract_urls(webpage):

2645

# Embedded YouTube player

2646

entries = [

2647

unescapeHTML(mobj.group('url'))

2648

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2659

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2660

\1''', webpage)]

2661

2662

# lazyYT YouTube embed

2663

entries.extend(list(map(

2664

unescapeHTML,

2665

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2666

2667

# Wordpress "YouTube Video Importer" plugin

2668

matches = re.findall(r'''(?x)<div[^>]+

2669

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2670

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2671

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2677

urls = YoutubeIE._extract_urls(webpage)

2678

return urls[0] if urls else None

2679

2680

@classmethod

2681

def extract_id(cls, url):

2682

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2683

if mobj is None:

2684

raise ExtractorError('Invalid URL: %s' % url)

2685

return mobj.group('id')

2686

2687

def _extract_chapters_from_json(self, data, duration):

2688

chapter_list = traverse_obj(

2689

data, (

2690

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2691

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2692

), expected_type=list)

2693

2694

return self._extract_chapters(

2695

chapter_list,

2696

chapter_time=lambda chapter: float_or_none(

2697

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2698

chapter_title=lambda chapter: traverse_obj(

2699

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2700

duration=duration)

2701

2702

def _extract_chapters_from_engagement_panel(self, data, duration):

2703

content_list = traverse_obj(

2704

data,

2705

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2706

expected_type=list, default=[])

2707

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2708

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2713

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2714

chapter_time, chapter_title, duration)

2715

for contents in content_list

2716

))), [])

2717

2718

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2719

chapters = []

2720

last_chapter = {'start_time': 0}

2721

for idx, chapter in enumerate(chapter_list or []):

2722

title = chapter_title(chapter)

2723

start_time = chapter_time(chapter)

2724

if start_time is None:

2725

continue

2726

last_chapter['end_time'] = start_time

2727

if start_time < last_chapter['start_time']:

2728

if idx == 1:

2729

chapters.pop()

2730

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2731

else:

2732

self.report_warning(f'Invalid start time for chapter "{title}"')

2733

continue

2734

last_chapter = {'start_time': start_time, 'title': title}

2735

chapters.append(last_chapter)

2736

last_chapter['end_time'] = duration

2737

return chapters

2738

2739

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2740

return self._parse_json(self._search_regex(

2741

(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',

2742

regex), webpage, name, default='{}'), video_id, fatal=False)

2743

2744

def _extract_comment(self, comment_renderer, parent=None):

2745

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2750

2751

# note: timestamp is an estimate calculated from the current time and time_text

2752

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2753

author = self._get_text(comment_renderer, 'authorText')

2754

author_id = try_get(comment_renderer,

2755

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2756

2757

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2758

lambda x: x['likeCount']), compat_str)) or 0

2759

author_thumbnail = try_get(comment_renderer,

2760

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2761

2762

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2763

is_favorited = 'creatorHeart' in (try_get(

2764

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2769

'time_text': time_text,

2770

'like_count': votes,

2771

'is_favorited': is_favorited,

2772

'author': author,

2773

'author_id': author_id,

2774

'author_thumbnail': author_thumbnail,

2775

'author_is_uploader': author_is_uploader,

2776

'parent': parent or 'root'

2777

}

2778

2779

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2780

2781

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2782

2783

def extract_header(contents):

2784

_continuation = None

2785

for content in contents:

2786

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2787

expected_comment_count = self._get_count(

2788

comments_header_renderer, 'countText', 'commentsCount')

2789

2790

if expected_comment_count:

2791

tracker['est_total'] = expected_comment_count

2792

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2793

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2794

2795

sort_menu_item = try_get(

2796

comments_header_renderer,

2797

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2798

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2799

2800

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2801

if not _continuation:

2802

continue

2803

2804

sort_text = str_or_none(sort_menu_item.get('title'))

2805

if not sort_text:

2806

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2807

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2812

if not parent:

2813

tracker['current_page_thread'] = 0

2814

for content in contents:

2815

if not parent and tracker['total_parent_comments'] >= max_parents:

2816

yield

2817

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2818

comment_renderer = get_first(

2819

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2820

expected_type=dict, default={})

2821

2822

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2827

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2828

yield comment

2829

2830

# Attempt to get the replies

2831

comment_replies_renderer = try_get(

2832

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2833

2834

if comment_replies_renderer:

2835

tracker['current_page_thread'] += 1

2836

comment_entries_iter = self._comment_entries(

2837

comment_replies_renderer, ytcfg, video_id,

2838

parent=comment.get('id'), tracker=tracker)

2839

yield from itertools.islice(comment_entries_iter, min(

2840

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2841

2842

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2848

total_parent_comments=0,

2849

total_reply_comments=0)

2850

2851

# TODO: Deprecated

2852

# YouTube comments have a max depth of 2

2853

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2854

if max_depth:

2855

self._downloader.deprecation_warning(

2856

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2857

if max_depth == 1 and parent:

2858

return

2859

2860

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2861

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2862

2863

continuation = self._extract_continuation(root_continuation_data)

2864

2865

response = None

2866

is_forced_continuation = False

2867

is_first_continuation = parent is None

2868

if is_first_continuation and not continuation:

2869

# Sometimes you can get comments by generating the continuation yourself,

2870

# even if YouTube initially reports them being disabled - e.g. stories comments.

2871

# Note: if the comment section is actually disabled, YouTube may return a response with

2872

# required check_get_keys missing. So we will disable that check initially in this case.

2873

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2874

is_forced_continuation = True

2875

2876

for page_num in itertools.count(0):

2877

if not continuation:

2878

break

2879

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2880

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2881

if page_num == 0:

2882

if is_first_continuation:

2883

note_prefix = 'Downloading comment section API JSON'

2884

else:

2885

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2886

tracker['current_page_thread'], comment_prog_str)

2887

else:

2888

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2889

' ' if parent else '', ' replies' if parent else '',

2890

page_num, comment_prog_str)

2891

2892

response = self._extract_response(

2893

item_id=None, query=continuation,

2894

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2895

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2896

is_forced_continuation = False

2897

continuation_contents = traverse_obj(

2898

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2899

2900

continuation = None

2901

for continuation_section in continuation_contents:

2902

continuation_items = traverse_obj(

2903

continuation_section,

2904

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2905

get_all=False, expected_type=list) or []

2906

if is_first_continuation:

2907

continuation = extract_header(continuation_items)

2908

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2922

if message and not parent and tracker['running_total'] == 0:

2923

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2924

2925

@staticmethod

2926

def _generate_comment_continuation(video_id):

2927

"""

2928

Generates initial comment section continuation token from given video id

2929

"""

2930

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2931

return base64.b64encode(token.encode()).decode()

2932

2933

def _get_comments(self, ytcfg, video_id, contents, webpage):

2934

"""Entry for comment extraction"""

2935

def _real_comment_extract(contents):

2936

renderer = next((

2937

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2938

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2939

yield from self._comment_entries(renderer, ytcfg, video_id)

2940

2941

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2942

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2943

2944

@staticmethod

2945

def _get_checkok_params():

2946

return {'contentCheckOk': True, 'racyCheckOk': True}

2947

2948

@classmethod

2949

def _generate_player_context(cls, sts=None):

2950

context = {

2951

'html5Preference': 'HTML5_PREF_WANTS',

2952

}

2953

if sts is not None:

2954

context['signatureTimestamp'] = sts

2955

return {

2956

'playbackContext': {

2957

'contentPlaybackContext': context

2958

},

2959

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2964

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2965

return True

2966

2967

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2968

AGE_GATE_REASONS = (

2969

'confirm your age', 'age-restricted', 'inappropriate', # reason

2970

'age_verification_required', 'age_check_required', # status

2971

)

2972

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2973

2974

@staticmethod

2975

def _is_unplayable(player_response):

2976

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2977

2978

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2979

2980

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2981

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2982

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2983

headers = self.generate_api_headers(

2984

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

2989

}

2990

yt_query.update(self._generate_player_context(sts))

2991

return self._extract_response(

2992

item_id=video_id, ep='player', query=yt_query,

2993

ytcfg=player_ytcfg, headers=headers, fatal=True,

2994

default_client=client,

2995

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2996

) or None

2997

2998

def _get_requested_clients(self, url, smuggled_data):

2999

requested_clients = []

3000

default = ['android', 'web']

3001

allowed_clients = sorted(

3002

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3003

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3004

for client in self._configuration_arg('player_client'):

3005

if client in allowed_clients:

3006

requested_clients.append(client)

3007

elif client == 'default':

3008

requested_clients.extend(default)

3009

elif client == 'all':

3010

requested_clients.extend(allowed_clients)

3011

else:

3012

self.report_warning(f'Skipping unsupported client {client}')

3013

if not requested_clients:

3014

requested_clients = default

3015

3016

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3017

requested_clients.extend(

3018

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3019

3020

return orderedSet(requested_clients)

3021

3022

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3023

initial_pr = None

3024

if webpage:

3025

initial_pr = self._extract_yt_initial_variable(

3026

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

3027

video_id, 'initial player response')

3028

3029

all_clients = set(clients)

3030

clients = clients[::-1]

3031

prs = []

3032

3033

def append_client(*client_names):

3034

""" Append the first client name that exists but not already used """

3035

for client_name in client_names:

3036

actual_client = _split_innertube_client(client_name)[0]

3037

if actual_client in INNERTUBE_CLIENTS:

3038

if actual_client not in all_clients:

3039

clients.append(client_name)

3040

all_clients.add(actual_client)

3041

return

3042

3043

# Android player_response does not have microFormats which are needed for

3044

# extraction of some data. So we return the initial_pr with formats

3045

# stripped out even if not requested by the user

3046

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3047

if initial_pr:

3048

pr = dict(initial_pr)

3049

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3054

player_url = None

3055

while clients:

3056

client, base_client, variant = _split_innertube_client(clients.pop())

3057

player_ytcfg = master_ytcfg if client == 'web' else {}

3058

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3059

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3060

3061

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3062

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3063

if 'js' in self._configuration_arg('player_skip'):

3064

require_js_player = False

3065

player_url = None

3066

3067

if not player_url and not tried_iframe_fallback and require_js_player:

3068

player_url = self._download_player_url(video_id)

3069

tried_iframe_fallback = True

3070

3071

try:

3072

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3073

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3074

except ExtractorError as e:

3075

if last_error:

3076

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3084

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3085

append_client(f'{base_client}_creator')

3086

elif self._is_agegated(pr):

3087

if variant == 'tv_embedded':

3088

append_client(f'{base_client}_embedded')

3089

elif not variant:

3090

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3096

return prs, player_url

3097

3098

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3099

itags, stream_ids = {}, []

3100

itag_qualities, res_qualities = {}, {}

3101

q = qualities([

3102

# Normally tiny is the smallest video-only formats. But

3103

# audio-only formats with unknown quality may get tagged as tiny

3104

'tiny',

3105

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3106

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3107

])

3108

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3109

3110

for fmt in streaming_formats:

3111

if fmt.get('targetDurationSec'):

3112

continue

3113

3114

itag = str_or_none(fmt.get('itag'))

3115

audio_track = fmt.get('audioTrack') or {}

3116

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3117

if stream_id in stream_ids:

3118

continue

3119

3120

quality = fmt.get('quality')

3121

height = int_or_none(fmt.get('height'))

3122

if quality == 'tiny' or not quality:

3123

quality = fmt.get('audioQuality', '').lower() or quality

3124

# The 3gp format (17) in android client has a quality of "small",

3125

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3131

if height:

3132

res_qualities[height] = quality

3133

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3134

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3135

# number of fragment that would subsequently requested with (`&sq=N`)

3136

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3137

continue

3138

3139

fmt_url = fmt.get('url')

3140

if not fmt_url:

3141

sc = compat_parse_qs(fmt.get('signatureCipher'))

3142

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3143

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3144

if not all((sc, fmt_url, player_url, encrypted_sig)):

3145

continue

3146

try:

3147

fmt_url += '&%s=%s' % (

3148

traverse_obj(sc, ('sp', -1)) or 'signature',

3149

self._decrypt_signature(encrypted_sig, video_id, player_url)

3150

)

3151

except ExtractorError as e:

3152

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3153

self.write_debug(e, only_once=True)

3154

continue

3155

3156

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3161

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3162

except ExtractorError as e:

3163

self.report_warning(

3164

'nsig extraction failed: You may experience throttling for some formats\n'

3165

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3166

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3171

stream_ids.append(stream_id)

3172

3173

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3174

language_preference = (

3175

10 if audio_track.get('audioIsDefault') and 10

3176

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3177

else -1)

3178

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3179

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3180

# Make sure to avoid false positives with small duration differences.

3181

# Eg: __2ABJjxzNo, ySuUZEjARPY

3182

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3183

if is_damaged:

3184

self.report_warning(

3185

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3186

dct = {

3187

'asr': int_or_none(fmt.get('audioSampleRate')),

3188

'filesize': int_or_none(fmt.get('contentLength')),

3189

'format_id': itag,

3190

'format_note': join_nonempty(

3191

'%s%s' % (audio_track.get('displayName') or '',

3192

' (default)' if language_preference > 0 else ''),

3193

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3194

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3195

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3196

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3197

'fps': int_or_none(fmt.get('fps')) or None,

3198

'height': height,

3199

'quality': q(quality),

3200

'has_drm': bool(fmt.get('drmFamilies')),

3201

'tbr': tbr,

3202

'url': fmt_url,

3203

'width': int_or_none(fmt.get('width')),

3204

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3205

'desc' if language_preference < -1 else ''),

3206

'language_preference': language_preference,

3207

# Strictly de-prioritize damaged and 3gp formats

3208

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3209

}

3210

mime_mobj = re.match(

3211

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3212

if mime_mobj:

3213

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3214

dct.update(parse_codecs(mime_mobj.group(2)))

3215

no_audio = dct.get('acodec') == 'none'

3216

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3222

dct['downloader_options'] = {

3223

# Youtube throttles chunks >~10M

3224

'http_chunk_size': 10485760,

3225

}

3226

if dct.get('ext'):

3227

dct['container'] = dct['ext'] + '_dash'

3228

yield dct

3229

3230

live_from_start = is_live and self.get_param('live_from_start')

3231

skip_manifests = self._configuration_arg('skip')

3232

if not self.get_param('youtube_include_hls_manifest', True):

3233

skip_manifests.append('hls')

3234

if not self.get_param('youtube_include_dash_manifest', True):

3235

skip_manifests.append('dash')

3236

get_dash = 'dash' not in skip_manifests and (

3237

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3238

get_hls = not live_from_start and 'hls' not in skip_manifests

3239

3240

def process_manifest_format(f, proto, itag):

3241

if itag in itags:

3242

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3243

return False

3244

itag = f'{itag}-{proto}'

3245

if itag:

3246

f['format_id'] = itag

3247

itags[itag] = proto

3248

3249

f['quality'] = next((

3250

q(qdict[val])

3251

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3252

if val in qdict), -1)

3253

return True

3254

3255

for sd in streaming_data:

3256

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3257

if hls_manifest_url:

3258

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3259

if process_manifest_format(f, 'hls', self._search_regex(

3260

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3261

yield f

3262

3263

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3264

if dash_manifest_url:

3265

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3266

if process_manifest_format(f, 'dash', f['format_id']):

3267

f['filesize'] = int_or_none(self._search_regex(

3268

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3269

if live_from_start:

3270

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3275

spec = get_first(

3276

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3277

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3282

args = args.split('#')

3283

counts = list(map(int_or_none, args[:5]))

3284

if len(args) != 8 or not all(counts):

3285

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3286

continue

3287

width, height, frame_count, cols, rows = counts

3288

N, sigh = args[6:]

3289

3290

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3291

fragment_count = frame_count / (cols * rows)

3292

fragment_duration = duration / fragment_count

3293

yield {

3294

'format_id': f'sb{i}',

3295

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3305

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3306

} for j in range(math.ceil(fragment_count))],

3307

}

3308

3309

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3310

webpage = None

3311

if 'webpage' not in self._configuration_arg('player_skip'):

3312

webpage = self._download_webpage(

3313

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3314

3315

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3316

3317

player_responses, player_url = self._extract_player_responses(

3318

self._get_requested_clients(url, smuggled_data),

3319

video_id, webpage, master_ytcfg)

3320

3321

return webpage, master_ytcfg, player_responses, player_url

3322

3323

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3324

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3325

is_live = get_first(video_details, 'isLive')

3326

if is_live is None:

3327

is_live = get_first(live_broadcast_details, 'isLiveNow')

3328

3329

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3330

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3331

3332

return live_broadcast_details, is_live, streaming_data, formats

3333

3334

def _real_extract(self, url):

3335

url, smuggled_data = unsmuggle_url(url, {})

3336

video_id = self._match_id(url)

3337

3338

base_url = self.http_scheme() + '//www.youtube.com/'

3339

webpage_url = base_url + 'watch?v=' + video_id

3340

3341

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3342

3343

playability_statuses = traverse_obj(

3344

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3345

3346

trailer_video_id = get_first(

3347

playability_statuses,

3348

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3349

expected_type=str)

3350

if trailer_video_id:

3351

return self.url_result(

3352

trailer_video_id, self.ie_key(), trailer_video_id)

3353

3354

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3355

if webpage else (lambda x: None))

3356

3357

video_details = traverse_obj(

3358

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3359

microformats = traverse_obj(

3360

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3361

expected_type=dict, default=[])

3362

video_title = (

3363

get_first(video_details, 'title')

3364

or self._get_text(microformats, (..., 'title'))

3365

or search_meta(['og:title', 'twitter:title', 'title']))

3366

video_description = get_first(video_details, 'shortDescription')

3367

3368

multifeed_metadata_list = get_first(

3369

player_responses,

3370

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3371

expected_type=str)

3372

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3373

if self.get_param('noplaylist'):

3374

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3379

# Unquote should take place before split on comma (,) since textual

3380

# fields may contain comma as well (see

3381

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3382

feed_data = compat_parse_qs(

3383

compat_urllib_parse_unquote_plus(feed))

3384

3385

def feed_entry(name):

3386

return try_get(

3387

feed_data, lambda x: x[name][0], compat_str)

3388

3389

feed_id = feed_entry('id')

3390

if not feed_id:

3391

continue

3392

feed_title = feed_entry('title')

3393

title = video_title

3394

if feed_title:

3395

title += ' (%s)' % feed_title

3396

entries.append({

3397

'_type': 'url_transparent',

3398

'ie_key': 'Youtube',

3399

'url': smuggle_url(

3400

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3401

{'force_singlefeed': True}),

3402

'title': title,

3403

})

3404

feed_ids.append(feed_id)

3405

self.to_screen(

3406

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3407

% (', '.join(feed_ids), video_id))

3408

return self.playlist_result(

3409

entries, video_id, video_title, video_description)

3410

3411

duration = int_or_none(

3412

get_first(video_details, 'lengthSeconds')

3413

or get_first(microformats, 'lengthSeconds')

3414

or parse_duration(search_meta('duration'))) or None

3415

3416

if get_first(video_details, 'isPostLiveDvr'):

3417

self.write_debug('Video is in Post-Live Manifestless mode')

3418

if duration or 0 > 4 * 3600:

3419

self.report_warning(

3420

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3421

'This is a known issue and patches are welcome')

3422

3423

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3424

video_id, microformats, video_details, player_responses, player_url, duration)

3425

3426

if not formats:

3427

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3428

self.report_drm(video_id)

3429

pemr = get_first(

3430

playability_statuses,

3431

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3432

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3433

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3434

if subreason:

3435

if subreason == 'The uploader has not made this video available in your country.':

3436

countries = get_first(microformats, 'availableCountries')

3437

if not countries:

3438

regions_allowed = search_meta('regionsAllowed')

3439

countries = regions_allowed.split(',') if regions_allowed else None

3440

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3441

reason += f'. {subreason}'

3442

if reason:

3443

self.raise_no_formats(reason, expected=True)

3444

3445

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3446

if not keywords and webpage:

3447

keywords = [

3448

unescapeHTML(m.group('content'))

3449

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3450

for keyword in keywords:

3451

if keyword.startswith('yt:stretch='):

3452

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3453

if mobj:

3454

# NB: float is intentional for forcing float division

3455

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3460

f['stretched_ratio'] = ratio

3461

break

3462

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3463

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3464

if thumbnail_url:

3465

thumbnails.append({

3466

'url': thumbnail_url,

3467

})

3468

original_thumbnails = thumbnails.copy()

3469

3470

# The best resolution thumbnails sometimes does not appear in the webpage

3471

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3472

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3473

thumbnail_names = [

3474

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3475

# in resolution, these are not the custom thumbnail. So de-prioritize them

3476

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3477

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3478

]

3479

n_thumbnail_names = len(thumbnail_names)

3480

thumbnails.extend({

3481

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3482

video_id=video_id, name=name, ext=ext,

3483

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3484

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3485

for thumb in thumbnails:

3486

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3487

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3488

self._remove_duplicate_formats(thumbnails)

3489

self._downloader._sort_thumbnails(original_thumbnails)

3490

3491

category = get_first(microformats, 'category') or search_meta('genre')

3492

channel_id = str_or_none(

3493

get_first(video_details, 'channelId')

3494

or get_first(microformats, 'externalChannelId')

3495

or search_meta('channelId'))

3496

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3497

3498

live_content = get_first(video_details, 'isLiveContent')

3499

is_upcoming = get_first(video_details, 'isUpcoming')

3500

if is_live is None:

3501

if is_upcoming or live_content is False:

3502

is_live = False

3503

if is_upcoming is None and (live_content or is_live):

3504

is_upcoming = False

3505

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3506

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3507

if not duration and live_end_time and live_start_time:

3508

duration = live_end_time - live_start_time

3509

3510

if is_live and self.get_param('live_from_start'):

3511

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3512

3513

formats.extend(self._extract_storyboard(player_responses, duration))

3514

3515

# Source is given priority since formats that throttle are given lower source_preference

3516

# When throttling issue is fully fixed, remove this

3517

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3522

'formats': formats,

3523

'thumbnails': thumbnails,

3524

# The best thumbnail that we are sure exists. Prevents unnecessary

3525

# URL checking if user don't care about getting the best possible thumbnail

3526

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3527

'description': video_description,

3528

'uploader': get_first(video_details, 'author'),

3529

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3530

'uploader_url': owner_profile_url,

3531

'channel_id': channel_id,

3532

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3533

'duration': duration,

3534

'view_count': int_or_none(

3535

get_first((video_details, microformats), (..., 'viewCount'))

3536

or search_meta('interactionCount')),

3537

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3538

'age_limit': 18 if (

3539

get_first(microformats, 'isFamilySafe') is False

3540

or search_meta('isFamilyFriendly') == 'false'

3541

or search_meta('og:restrictions:age') == '18+') else 0,

3542

'webpage_url': webpage_url,

3543

'categories': [category] if category else None,

3544

'tags': keywords,

3545

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3546

'is_live': is_live,

3547

'was_live': (False if is_live or is_upcoming or live_content is False

3548

else None if is_live is None or is_upcoming is None

3549

else live_content),

3550

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3551

'release_timestamp': live_start_time,

3552

}

3553

3554

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3555

if pctr:

3556

def get_lang_code(track):

3557

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3558

or track.get('languageCode'))

3559

3560

# Converted into dicts to remove duplicates

3561

captions = {

3562

get_lang_code(sub): sub

3563

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3564

translation_languages = {

3565

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3566

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3567

3568

def process_language(container, base_url, lang_code, sub_name, query):

3569

lang_subs = container.setdefault(lang_code, [])

3570

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3581

for lang_code, caption_track in captions.items():

3582

base_url = caption_track.get('baseUrl')

3583

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3584

if not base_url:

3585

continue

3586

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3587

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3592

if not caption_track.get('isTranslatable'):

3593

continue

3594

for trans_code, trans_name in translation_languages.items():

3595

if not trans_code:

3596

continue

3597

orig_trans_code = trans_code

3598

if caption_track.get('kind') != 'asr':

3599

if 'translated_subs' in self._configuration_arg('skip'):

3600

continue

3601

trans_code += f'-{lang_code}'

3602

trans_name += format_field(lang_name, template=' from %s')

3603

# Add an "-orig" label to the original language so that it can be distinguished.

3604

# The subs are returned without "-orig" as well for compatibility

3605

if lang_code == f'a-{orig_trans_code}':

3606

process_language(

3607

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3608

# Setting tlang=lang returns damaged subtitles.

3609

process_language(automatic_captions, base_url, trans_code, trans_name,

3610

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3611

info['automatic_captions'] = automatic_captions

3612

info['subtitles'] = subtitles

3613

3614

parsed_url = compat_urllib_parse_urlparse(url)

3615

for component in [parsed_url.fragment, parsed_url.query]:

3616

query = compat_parse_qs(component)

3617

for k, v in query.items():

3618

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3619

d_k += '_time'

3620

if d_k not in info and k in s_ks:

3621

info[d_k] = parse_duration(query[k][0])

3622

3623

# Youtube Music Auto-generated description

3624

if video_description:

3625

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3626

if mobj:

3627

release_year = mobj.group('release_year')

3628

release_date = mobj.group('release_date')

3629

if release_date:

3630

release_date = release_date.replace('-', '')

3631

if not release_year:

3632

release_year = release_date[:4]

3633

info.update({

3634

'album': mobj.group('album'.strip()),

3635

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3636

'track': mobj.group('track').strip(),

3637

'release_date': release_date,

3638

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3644

webpage, self._YT_INITIAL_DATA_RE, video_id,

3645

'yt initial data')

3646

if not initial_data:

3647

query = {'videoId': video_id}

3648

query.update(self._get_checkok_params())

3649

initial_data = self._extract_response(

3650

item_id=video_id, ep='next', fatal=False,

3651

ytcfg=master_ytcfg, query=query,

3652

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3653

note='Downloading initial data API JSON')

3654

3655

try: # This will error if there is no livechat

3656

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3657

except (KeyError, IndexError, TypeError):

3658

pass

3659

else:

3660

info.setdefault('subtitles', {})['live_chat'] = [{

3661

'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies

3662

'video_id': video_id,

3663

'ext': 'json',

3664

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3670

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3671

or None)

3672

3673

contents = traverse_obj(

3674

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3675

expected_type=list, default=[])

3676

3677

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3678

if vpir:

3679

stl = vpir.get('superTitleLink')

3680

if stl:

3681

stl = self._get_text(stl)

3682

if try_get(

3683

vpir,

3684

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3685

info['location'] = stl

3686

else:

3687

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3688

if mobj:

3689

info.update({

3690

'series': mobj.group(1),

3691

'season_number': int(mobj.group(2)),

3692

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3697

list) or []):

3698

tbr = tlb.get('toggleButtonRenderer') or {}

3699

for getter, regex in [(

3700

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3701

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3702

lambda x: x['accessibility'],

3703

lambda x: x['accessibilityData']['accessibilityData'],

3704

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3705

label = (try_get(tbr, getter, dict) or {}).get('label')

3706

if label:

3707

mobj = re.match(regex, label)

3708

if mobj:

3709

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3710

break

3711

sbr_tooltip = try_get(

3712

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3713

if sbr_tooltip:

3714

like_count, dislike_count = sbr_tooltip.split(' / ')

3715

info.update({

3716

'like_count': str_to_int(like_count),

3717

'dislike_count': str_to_int(dislike_count),

3718

})

3719

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3720

if vsir:

3721

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3722

info.update({

3723

'channel': self._get_text(vor, 'title'),

3724

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3729

list) or []

3730

multiple_songs = False

3731

for row in rows:

3732

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3733

multiple_songs = True

3734

break

3735

for row in rows:

3736

mrr = row.get('metadataRowRenderer') or {}

3737

mrr_title = mrr.get('title')

3738

if not mrr_title:

3739

continue

3740

mrr_title = self._get_text(mrr, 'title')

3741

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3742

if mrr_title == 'License':

3743

info['license'] = mrr_contents_text

3744

elif not multiple_songs:

3745

if mrr_title == 'Album':

3746

info['album'] = mrr_contents_text

3747

elif mrr_title == 'Artist':

3748

info['artist'] = mrr_contents_text

3749

elif mrr_title == 'Song':

3750

info['track'] = mrr_contents_text

3751

3752

fallbacks = {

3753

'channel': 'uploader',

3754

'channel_id': 'uploader_id',

3755

'channel_url': 'uploader_url',

3756

}

3757

3758

# The upload date for scheduled, live and past live streams / premieres in microformats

3759

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3760

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3761

upload_date = (

3762

unified_strdate(get_first(microformats, 'uploadDate'))

3763

or unified_strdate(search_meta('uploadDate')))

3764

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3765

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3766

info['upload_date'] = upload_date

3767

3768

for to, frm in fallbacks.items():

3769

if not info.get(to):

3770

info[to] = info.get(frm)

3771

3772

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3778

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3779

is_membersonly = None

3780

is_premium = None

3781

if initial_data and is_private is not None:

3782

is_membersonly = False

3783

is_premium = False

3784

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3785

badge_labels = set()

3786

for content in contents:

3787

if not isinstance(content, dict):

3788

continue

3789

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3790

for badge_label in badge_labels:

3791

if badge_label.lower() == 'members only':

3792

is_membersonly = True

3793

elif badge_label.lower() == 'premium':

3794

is_premium = True

3795

elif badge_label.lower() == 'unlisted':

3796

is_unlisted = True

3797

3798

info['availability'] = self._availability(

3799

is_private=is_private,

3800

needs_premium=is_premium,

3801

needs_subscription=is_membersonly,

3802

needs_auth=info['age_limit'] >= 18,

3803

is_unlisted=None if is_private is None else is_unlisted)

3804

3805

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3806

3807

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3813

3814

@staticmethod

3815

def passthrough_smuggled_data(func):

3816

def _smuggle(entries, smuggled_data):

3817

for entry in entries:

3818

# TODO: Convert URL to music.youtube instead.

3819

# Do we need to passthrough any other smuggled_data?

3820

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3821

yield entry

3822

3823

@functools.wraps(func)

3824

def wrapper(self, url):

3825

url, smuggled_data = unsmuggle_url(url, {})

3826

if self.is_music_url(url):

3827

smuggled_data['is_music_url'] = True

3828

info_dict = func(self, url, smuggled_data)

3829

if smuggled_data and info_dict.get('entries'):

3830

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3835

channel_id = self._html_search_meta(

3836

'channelId', webpage, 'channel id', default=None)

3837

if channel_id:

3838

return channel_id

3839

channel_url = self._html_search_meta(

3840

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3841

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3842

'twitter:app:url:googleplay'), webpage, 'channel url')

3843

return self._search_regex(

3844

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3845

channel_url, 'channel id')

3846

3847

@staticmethod

3848

def _extract_basic_item_renderer(item):

3849

# Modified from _extract_grid_item_renderer

3850

known_basic_renderers = (

3851

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3852

)

3853

for key, renderer in item.items():

3854

if not isinstance(renderer, dict):

3855

continue

3856

elif key in known_basic_renderers:

3857

return renderer

3858

elif key.startswith('grid') and key.endswith('Renderer'):

3859

return renderer

3860

3861

def _grid_entries(self, grid_renderer):

3862

for item in grid_renderer['items']:

3863

if not isinstance(item, dict):

3864

continue

3865

renderer = self._extract_basic_item_renderer(item)

3866

if not isinstance(renderer, dict):

3867

continue

3868

title = self._get_text(renderer, 'title')

3869

3870

# playlist

3871

playlist_id = renderer.get('playlistId')

3872

if playlist_id:

3873

yield self.url_result(

3874

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3875

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3880

if video_id:

3881

yield self._extract_video(renderer)

3882

continue

3883

# channel

3884

channel_id = renderer.get('channelId')

3885

if channel_id:

3886

yield self.url_result(

3887

'https://www.youtube.com/channel/%s' % channel_id,

3888

ie=YoutubeTabIE.ie_key(), video_title=title)

3889

continue

3890

# generic endpoint URL support

3891

ep_url = urljoin('https://www.youtube.com/', try_get(

3892

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3893

compat_str))

3894

if ep_url:

3895

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3896

if ie.suitable(ep_url):

3897

yield self.url_result(

3898

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3899

break

3900

3901

def _music_reponsive_list_entry(self, renderer):

3902

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3903

if video_id:

3904

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3905

ie=YoutubeIE.ie_key(), video_id=video_id)

3906

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3907

if playlist_id:

3908

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3909

if video_id:

3910

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3911

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3912

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3913

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3914

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3915

if browse_id:

3916

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3917

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3918

3919

def _shelf_entries_from_content(self, shelf_renderer):

3920

content = shelf_renderer.get('content')

3921

if not isinstance(content, dict):

3922

return

3923

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3924

if renderer:

3925

# TODO: add support for nested playlists so each shelf is processed

3926

# as separate playlist

3927

# TODO: this includes only first N items

3928

yield from self._grid_entries(renderer)

3929

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3935

ep = try_get(

3936

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3937

compat_str)

3938

shelf_url = urljoin('https://www.youtube.com', ep)

3939

if shelf_url:

3940

# Skipping links to another channels, note that checking for

3941

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3942

# will not work

3943

if skip_channels and '/channels?' in shelf_url:

3944

return

3945

title = self._get_text(shelf_renderer, 'title')

3946

yield self.url_result(shelf_url, video_title=title)

3947

# Shelf may not contain shelf URL, fallback to extraction from content

3948

yield from self._shelf_entries_from_content(shelf_renderer)

3949

3950

def _playlist_entries(self, video_list_renderer):

3951

for content in video_list_renderer['contents']:

3952

if not isinstance(content, dict):

3953

continue

3954

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3955

if not isinstance(renderer, dict):

3956

continue

3957

video_id = renderer.get('videoId')

3958

if not video_id:

3959

continue

3960

yield self._extract_video(renderer)

3961

3962

def _rich_entries(self, rich_grid_renderer):

3963

renderer = try_get(

3964

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3965

video_id = renderer.get('videoId')

3966

if not video_id:

3967

return

3968

yield self._extract_video(renderer)

3969

3970

def _video_entry(self, video_renderer):

3971

video_id = video_renderer.get('videoId')

3972

if video_id:

3973

return self._extract_video(video_renderer)

3974

3975

def _hashtag_tile_entry(self, hashtag_tile_renderer):

3976

url = urljoin('https://youtube.com', traverse_obj(

3977

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

3978

if url:

3979

return self.url_result(

3980

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

3981

3982

def _post_thread_entries(self, post_thread_renderer):

3983

post_renderer = try_get(

3984

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3985

if not post_renderer:

3986

return

3987

# video attachment

3988

video_renderer = try_get(

3989

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3990

video_id = video_renderer.get('videoId')

3991

if video_id:

3992

entry = self._extract_video(video_renderer)

3993

if entry:

3994

yield entry

3995

# playlist attachment

3996

playlist_id = try_get(

3997

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3998

if playlist_id:

3999

yield self.url_result(

4000

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4001

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4002

# inline video links

4003

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4004

for run in runs:

4005

if not isinstance(run, dict):

4006

continue

4007

ep_url = try_get(

4008

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

4009

if not ep_url:

4010

continue

4011

if not YoutubeIE.suitable(ep_url):

4012

continue

4013

ep_video_id = YoutubeIE._match_id(ep_url)

4014

if video_id == ep_video_id:

4015

continue

4016

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4017

4018

def _post_thread_continuation_entries(self, post_thread_continuation):

4019

contents = post_thread_continuation.get('contents')

4020

if not isinstance(contents, list):

4021

return

4022

for content in contents:

4023

renderer = content.get('backstagePostThreadRenderer')

4024

if not isinstance(renderer, dict):

4025

continue

4026

yield from self._post_thread_entries(renderer)

4027

4028

r''' # unused

4029

def _rich_grid_entries(self, contents):

4030

for content in contents:

4031

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4032

if video_renderer:

4033

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4039

# continuation_list is modified in-place with continuation_list = [continuation_token]

4040

continuation_list[:] = [None]

4041

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4042

for content in contents:

4043

if not isinstance(content, dict):

4044

continue

4045

is_renderer = traverse_obj(

4046

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4047

expected_type=dict)

4048

if not is_renderer:

4049

renderer = content.get('richItemRenderer')

4050

if renderer:

4051

for entry in self._rich_entries(renderer):

4052

yield entry

4053

continuation_list[0] = self._extract_continuation(parent_renderer)

4054

continue

4055

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4056

for isr_content in isr_contents:

4057

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4062

'gridRenderer': self._grid_entries,

4063

'reelShelfRenderer': self._grid_entries,

4064

'shelfRenderer': self._shelf_entries,

4065

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4066

'backstagePostThreadRenderer': self._post_thread_entries,

4067

'videoRenderer': lambda x: [self._video_entry(x)],

4068

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4069

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4070

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4071

}

4072

for key, renderer in isr_content.items():

4073

if key not in known_renderers:

4074

continue

4075

for entry in known_renderers[key](renderer):

4076

if entry:

4077

yield entry

4078

continuation_list[0] = self._extract_continuation(renderer)

4079

break

4080

4081

if not continuation_list[0]:

4082

continuation_list[0] = self._extract_continuation(is_renderer)

4083

4084

if not continuation_list[0]:

4085

continuation_list[0] = self._extract_continuation(parent_renderer)

4086

4087

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4088

continuation_list = [None]

4089

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4090

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4095

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4096

yield from extract_entries(parent_renderer)

4097

continuation = continuation_list[0]

4098

4099

for page_num in itertools.count(1):

4100

if not continuation:

4101

break

4102

headers = self.generate_api_headers(

4103

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4104

response = self._extract_response(

4105

item_id=f'{item_id} page {page_num}',

4106

query=continuation, headers=headers, ytcfg=ytcfg,

4107

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4112

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4113

visitor_data = self._extract_visitor_data(response) or visitor_data

4114

4115

known_continuation_renderers = {

4116

'playlistVideoListContinuation': self._playlist_entries,

4117

'gridContinuation': self._grid_entries,

4118

'itemSectionContinuation': self._post_thread_continuation_entries,

4119

'sectionListContinuation': extract_entries, # for feeds

4120

}

4121

continuation_contents = try_get(

4122

response, lambda x: x['continuationContents'], dict) or {}

4123

continuation_renderer = None

4124

for key, value in continuation_contents.items():

4125

if key not in known_continuation_renderers:

4126

continue

4127

continuation_renderer = value

4128

continuation_list = [None]

4129

yield from known_continuation_renderers[key](continuation_renderer)

4130

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4131

break

4132

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4137

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4138

'gridVideoRenderer': (self._grid_entries, 'items'),

4139

'gridChannelRenderer': (self._grid_entries, 'items'),

4140

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4141

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4142

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4143

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4144

}

4145

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4146

continuation_items = try_get(

4147

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4148

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4149

video_items_renderer = None

4150

for key, value in continuation_item.items():

4151

if key not in known_renderers:

4152

continue

4153

video_items_renderer = {known_renderers[key][1]: continuation_items}

4154

continuation_list = [None]

4155

yield from known_renderers[key][0](video_items_renderer)

4156

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4157

break

4158

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4164

for tab in tabs:

4165

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4166

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4171

4172

def _extract_uploader(self, data):

4173

uploader = {}

4174

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4175

owner = try_get(

4176

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4177

if owner:

4178

owner_text = owner.get('text')

4179

uploader['uploader'] = self._search_regex(

4180

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4181

uploader['uploader_id'] = try_get(

4182

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4183

uploader['uploader_url'] = urljoin(

4184

'https://www.youtube.com/',

4185

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4186

return {k: v for k, v in uploader.items() if v is not None}

4187

4188

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4189

playlist_id = title = description = channel_url = channel_name = channel_id = None

4190

tags = []

4191

4192

selected_tab = self._extract_selected_tab(tabs)

4193

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4194

renderer = try_get(

4195

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4196

if renderer:

4197

channel_name = renderer.get('title')

4198

channel_url = renderer.get('channelUrl')

4199

channel_id = renderer.get('externalId')

4200

else:

4201

renderer = try_get(

4202

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4203

4204

if renderer:

4205

title = renderer.get('title')

4206

description = renderer.get('description', '')

4207

playlist_id = channel_id

4208

tags = renderer.get('keywords', '').split()

4209

4210

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4211

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4212

def _get_uncropped(url):

4213

return url_or_none((url or '').split('=')[0] + '=s0')

4214

4215

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4216

if avatar_thumbnails:

4217

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4218

if uncropped_avatar:

4219

avatar_thumbnails.append({

4220

'url': uncropped_avatar,

4221

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4226

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4227

for banner in channel_banners:

4228

banner['preference'] = -10

4229

4230

if channel_banners:

4231

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4232

if uncropped_banner:

4233

channel_banners.append({

4234

'url': uncropped_banner,

4235

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4240

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4241

4242

if playlist_id is None:

4243

playlist_id = item_id

4244

4245

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4246

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4247

if title is None:

4248

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4249

title += format_field(selected_tab, 'title', ' - %s')

4250

title += format_field(selected_tab, 'expandedText', ' - %s')

4251

4252

metadata = {

4253

'playlist_id': playlist_id,

4254

'playlist_title': title,

4255

'playlist_description': description,

4256

'uploader': channel_name,

4257

'uploader_id': channel_id,

4258

'uploader_url': channel_url,

4259

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4260

'tags': tags,

4261

'view_count': self._get_count(playlist_stats, 1),

4262

'availability': self._extract_availability(data),

4263

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4264

'playlist_count': self._get_count(playlist_stats, 0),

4265

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4266

}

4267

if not channel_id:

4268

metadata.update(self._extract_uploader(data))

4269

metadata.update({

4270

'channel': metadata['uploader'],

4271

'channel_id': metadata['uploader_id'],

4272

'channel_url': metadata['uploader_url']})

4273

return self.playlist_result(

4274

self._entries(

4275

selected_tab, playlist_id, ytcfg,

4276

self._extract_account_syncid(ytcfg, data),

4277

self._extract_visitor_data(data, ytcfg)),

4278

**metadata)

4279

4280

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4281

first_id = last_id = response = None

4282

for page_num in itertools.count(1):

4283

videos = list(self._playlist_entries(playlist))

4284

if not videos:

4285

return

4286

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4287

if start >= len(videos):

4288

return

4289

yield from videos[start:]

4290

first_id = first_id or videos[0]['id']

4291

last_id = videos[-1]['id']

4292

watch_endpoint = try_get(

4293

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4294

headers = self.generate_api_headers(

4295

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4296

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4297

query = {

4298

'playlistId': playlist_id,

4299

'videoId': watch_endpoint.get('videoId') or last_id,

4300

'index': watch_endpoint.get('index') or len(videos),

4301

'params': watch_endpoint.get('params') or 'OAE%3D'

4302

}

4303

response = self._extract_response(

4304

item_id='%s page %d' % (playlist_id, page_num),

4305

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4306

check_get_keys='contents'

4307

)

4308

playlist = try_get(

4309

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4310

4311

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4312

title = playlist.get('title') or try_get(

4313

data, lambda x: x['titleText']['simpleText'], compat_str)

4314

playlist_id = playlist.get('playlistId') or item_id

4315

4316

# Delegating everything except mix playlists to regular tab-based playlist URL

4317

playlist_url = urljoin(url, try_get(

4318

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4319

compat_str))

4320

4321

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4322

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4323

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4324

4325

if playlist_url and playlist_url != url and not is_known_unviewable:

4326

return self.url_result(

4327

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4328

video_title=title)

4329

4330

return self.playlist_result(

4331

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4332

playlist_id=playlist_id, playlist_title=title)

4333

4334

def _extract_availability(self, data):

4335

"""

4336

Gets the availability of a given playlist/tab.

4337

Note: Unless YouTube tells us explicitly, we do not assume it is public

4338

@param data: response

4339

"""

4340

is_private = is_unlisted = None

4341

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4342

badge_labels = self._extract_badges(renderer)

4343

4344

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4345

privacy_dropdown_entries = try_get(

4346

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4347

for renderer_dict in privacy_dropdown_entries:

4348

is_selected = try_get(

4349

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4350

if not is_selected:

4351

continue

4352

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4353

if label:

4354

badge_labels.add(label.lower())

4355

break

4356

4357

for badge_label in badge_labels:

4358

if badge_label == 'unlisted':

4359

is_unlisted = True

4360

elif badge_label == 'private':

4361

is_private = True

4362

elif badge_label == 'public':

4363

is_unlisted = is_private = False

4364

return self._availability(is_private, False, False, False, is_unlisted)

4365

4366

@staticmethod

4367

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4368

sidebar_renderer = try_get(

4369

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4370

for item in sidebar_renderer:

4371

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4376

"""

4377

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4378

"""

4379

browse_id = params = None

4380

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4381

if not renderer:

4382

return

4383

menu_renderer = try_get(

4384

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4385

for menu_item in menu_renderer:

4386

if not isinstance(menu_item, dict):

4387

continue

4388

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4389

text = try_get(

4390

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4391

if not text or text.lower() != 'show unavailable videos':

4392

continue

4393

browse_endpoint = try_get(

4394

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4395

browse_id = browse_endpoint.get('browseId')

4396

params = browse_endpoint.get('params')

4397

break

4398

4399

headers = self.generate_api_headers(

4400

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4401

visitor_data=self._extract_visitor_data(data, ytcfg))

4402

query = {

4403

'params': params or 'wgYCCAA=',

4404

'browseId': browse_id or 'VL%s' % item_id

4405

}

4406

return self._extract_response(

4407

item_id=item_id, headers=headers, query=query,

4408

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4409

note='Downloading API JSON with unavailable videos')

4410

4411

@functools.cached_property

4412

def skip_webpage(self):

4413

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4414

4415

def _extract_webpage(self, url, item_id, fatal=True):

4416

retries = self.get_param('extractor_retries', 3)

4417

count = -1

4418

webpage = data = last_error = None

4419

while count < retries:

4420

count += 1

4421

# Sometimes youtube returns a webpage with incomplete ytInitialData

4422

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4423

if last_error:

4424

self.report_warning('%s. Retrying ...' % last_error)

4425

try:

4426

webpage = self._download_webpage(

4427

url, item_id,

4428

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4429

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4430

except ExtractorError as e:

4431

if isinstance(e.cause, network_exceptions):

4432

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4433

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4443

except ExtractorError as e:

4444

if fatal:

4445

raise

4446

self.report_warning(error_to_compat_str(e))

4447

break

4448

4449

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4450

break

4451

4452

last_error = 'Incomplete yt initial data received'

4453

if count >= retries:

4454

if fatal:

4455

raise ExtractorError(last_error)

4456

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4462

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4463

if not ytcfg and self.is_authenticated:

4464

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4465

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4466

raise ExtractorError(

4467

f'{msg}. If you are not downloading private content, or '

4468

'your cookies are only for the first account and channel,'

4469

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4470

expected=True)

4471

self.report_warning(msg, only_once=True)

4472

4473

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4474

data = None

4475

if not self.skip_webpage:

4476

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4477

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4478

# Reject webpage data if redirected to home page without explicitly requesting

4479

selected_tab = self._extract_selected_tab(traverse_obj(

4480

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4481

if (url != 'https://www.youtube.com/feed/recommended'

4482

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4483

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4484

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4485

if fatal:

4486

raise ExtractorError(msg, expected=True)

4487

self.report_warning(msg, only_once=True)

4488

if not data:

4489

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4490

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4491

return data, ytcfg

4492

4493

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4494

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4495

resolve_response = self._extract_response(

4496

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4497

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4498

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4499

for ep_key, ep in endpoints.items():

4500

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4501

if params:

4502

return self._extract_response(

4503

item_id=item_id, query=params, ep=ep, headers=headers,

4504

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4505

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4506

err_note = 'Failed to resolve url (does the playlist exist?)'

4507

if fatal:

4508

raise ExtractorError(err_note, expected=True)

4509

self.report_warning(err_note, item_id)

4510

4511

_SEARCH_PARAMS = None

4512

4513

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4514

data = {'query': query}

4515

if params is NO_DEFAULT:

4516

params = self._SEARCH_PARAMS

4517

if params:

4518

data['params'] = params

4519

4520

content_keys = (

4521

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4522

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4523

# ytmusic search

4524

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4525

('continuationContents', ),

4526

)

4527

display_id = f'query "{query}"'

4528

check_get_keys = tuple({keys[0] for keys in content_keys})

4529

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4530

self._report_playlist_authcheck(ytcfg, fatal=False)

4531

4532

continuation_list = [None]

4533

search = None

4534

for page_num in itertools.count(1):

4535

data.update(continuation_list[0] or {})

4536

headers = self.generate_api_headers(

4537

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4538

search = self._extract_response(

4539

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4540

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4541

slr_contents = traverse_obj(search, *content_keys)

4542

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4543

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4548

IE_DESC = 'YouTube Tabs'

4549

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4558

(?P<not_channel>

4559

feed/|hashtag/|

4560

(?:playlist|watch)\?.*?\blist=

4561

)|

4562

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4567

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4568

}

4569

IE_NAME = 'youtube:tab'

4570

4571

_TESTS = [{

4572

'note': 'playlists, multipage',

4573

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4574

'playlist_mincount': 94,

4575

'info_dict': {

4576

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4577

'title': 'Igor Kleiner - Playlists',

4578

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4579

'uploader': 'Igor Kleiner',

4580

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4581

'channel': 'Igor Kleiner',

4582

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4583

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4584

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4585

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4586

'channel_follower_count': int

4587

},

4588

}, {

4589

'note': 'playlists, multipage, different order',

4590

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4591

'playlist_mincount': 94,

4592

'info_dict': {

4593

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4594

'title': 'Igor Kleiner - Playlists',

4595

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4596

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4597

'uploader': 'Igor Kleiner',

4598

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4599

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4600

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4601

'channel': 'Igor Kleiner',

4602

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4603

'channel_follower_count': int

4604

},

4605

}, {

4606

'note': 'playlists, series',

4607

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4608

'playlist_mincount': 5,

4609

'info_dict': {

4610

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4611

'title': '3Blue1Brown - Playlists',

4612

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4613

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4614

'uploader': '3Blue1Brown',

4615

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4616

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4617

'channel': '3Blue1Brown',

4618

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4619

'tags': ['Mathematics'],

4620

'channel_follower_count': int

4621

},

4622

}, {

4623

'note': 'playlists, singlepage',

4624

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4625

'playlist_mincount': 4,

4626

'info_dict': {

4627

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4628

'title': 'ThirstForScience - Playlists',

4629

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4630

'uploader': 'ThirstForScience',

4631

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4632

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4633

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4634

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4635

'tags': 'count:13',

4636

'channel': 'ThirstForScience',

4637

'channel_follower_count': int

4638

}

4639

}, {

4640

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4641

'only_matching': True,

4642

}, {

4643

'note': 'basic, single video playlist',

4644

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4645

'info_dict': {

4646

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4647

'uploader': 'Sergey M.',

4648

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4649

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4654

'channel': 'Sergey M.',

4655

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4656

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4657

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4662

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4663

'info_dict': {

4664

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4665

'uploader': 'Sergey M.',

4666

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4667

'title': 'youtube-dl empty playlist',

4668

'tags': [],

4669

'channel': 'Sergey M.',

4670

'description': '',

4671

'modified_date': '20160902',

4672

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4673

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4674

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4680

'info_dict': {

4681

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4682

'title': 'lex will - Home',

4683

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4684

'uploader': 'lex will',

4685

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4686

'channel': 'lex will',

4687

'tags': ['bible', 'history', 'prophesy'],

4688

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4689

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4690

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4691

'channel_follower_count': int

4692

},

4693

'playlist_mincount': 2,

4694

}, {

4695

'note': 'Videos tab',

4696

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4697

'info_dict': {

4698

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4699

'title': 'lex will - Videos',

4700

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4701

'uploader': 'lex will',

4702

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4703

'tags': ['bible', 'history', 'prophesy'],

4704

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4705

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4706

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4707

'channel': 'lex will',

4708

'channel_follower_count': int

4709

},

4710

'playlist_mincount': 975,

4711

}, {

4712

'note': 'Videos tab, sorted by popular',

4713

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4714

'info_dict': {

4715

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4716

'title': 'lex will - Videos',

4717

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4718

'uploader': 'lex will',

4719

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4720

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4721

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4722

'channel': 'lex will',

4723

'tags': ['bible', 'history', 'prophesy'],

4724

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4725

'channel_follower_count': int

4726

},

4727

'playlist_mincount': 199,

4728

}, {

4729

'note': 'Playlists tab',

4730

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4731

'info_dict': {

4732

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4733

'title': 'lex will - Playlists',

4734

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4735

'uploader': 'lex will',

4736

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4737

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4738

'channel': 'lex will',

4739

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4740

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4741

'tags': ['bible', 'history', 'prophesy'],

4742

'channel_follower_count': int

4743

},

4744

'playlist_mincount': 17,

4745

}, {

4746

'note': 'Community tab',

4747

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4748

'info_dict': {

4749

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4750

'title': 'lex will - Community',

4751

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4752

'uploader': 'lex will',

4753

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4754

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4755

'channel': 'lex will',

4756

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4757

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4758

'tags': ['bible', 'history', 'prophesy'],

4759

'channel_follower_count': int

4760

},

4761

'playlist_mincount': 18,

4762

}, {

4763

'note': 'Channels tab',

4764

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4765

'info_dict': {

4766

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4767

'title': 'lex will - Channels',

4768

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4769

'uploader': 'lex will',

4770

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4771

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4772

'channel': 'lex will',

4773

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4774

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4775

'tags': ['bible', 'history', 'prophesy'],

4776

'channel_follower_count': int

4777

},

4778

'playlist_mincount': 12,

4779

}, {

4780

'note': 'Search tab',

4781

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4782

'playlist_mincount': 40,

4783

'info_dict': {

4784

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4785

'title': '3Blue1Brown - Search - linear algebra',

4786

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4787

'uploader': '3Blue1Brown',

4788

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4789

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4790

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4791

'tags': ['Mathematics'],

4792

'channel': '3Blue1Brown',

4793

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4794

'channel_follower_count': int

4795

},

4796

}, {

4797

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4798

'only_matching': True,

4799

}, {

4800

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4801

'only_matching': True,

4802

}, {

4803

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4804

'only_matching': True,

4805

}, {

4806

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4807

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4808

'info_dict': {

4809

'title': '29C3: Not my department',

4810

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4811

'uploader': 'Christiaan008',

4812

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4813

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4814

'tags': [],

4815

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4816

'view_count': int,

4817

'modified_date': '20150605',

4818

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4819

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4820

'channel': 'Christiaan008',

4821

},

4822

'playlist_count': 96,

4823

}, {

4824

'note': 'Large playlist',

4825

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4826

'info_dict': {

4827

'title': 'Uploads from Cauchemar',

4828

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4829

'uploader': 'Cauchemar',

4830

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4831

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4832

'tags': [],

4833

'modified_date': r're:\d{8}',

4834

'channel': 'Cauchemar',

4835

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4836

'view_count': int,

4837

'description': '',

4838

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4839

},

4840

'playlist_mincount': 1123,

4841

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4842

}, {

4843

'note': 'even larger playlist, 8832 videos',

4844

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4845

'only_matching': True,

4846

}, {

4847

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4848

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4849

'info_dict': {

4850

'title': 'Uploads from Interstellar Movie',

4851

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4852

'uploader': 'Interstellar Movie',

4853

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4854

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4855

'tags': [],

4856

'view_count': int,

4857

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4858

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4859

'channel': 'Interstellar Movie',

4860

'description': '',

4861

'modified_date': r're:\d{8}',

4862

},

4863

'playlist_mincount': 21,

4864

}, {

4865

'note': 'Playlist with "show unavailable videos" button',

4866

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4867

'info_dict': {

4868

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4869

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4870

'uploader': 'Phim Siêu Nhân Nhật Bản',

4871

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4872

'view_count': int,

4873

'channel': 'Phim Siêu Nhân Nhật Bản',

4874

'tags': [],

4875

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4876

'description': '',

4877

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4878

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4879

'modified_date': r're:\d{8}',

4880

},

4881

'playlist_mincount': 200,

4882

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4883

}, {

4884

'note': 'Playlist with unavailable videos in page 7',

4885

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4886

'info_dict': {

4887

'title': 'Uploads from BlankTV',

4888

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4889

'uploader': 'BlankTV',

4890

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4891

'channel': 'BlankTV',

4892

'channel_url': 'https://www.youtube.com/c/blanktv',

4893

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4894

'view_count': int,

4895

'tags': [],

4896

'uploader_url': 'https://www.youtube.com/c/blanktv',

4897

'modified_date': r're:\d{8}',

4898

'description': '',

4899

},

4900

'playlist_mincount': 1000,

4901

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4902

}, {

4903

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4904

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4905

'info_dict': {

4906

'title': 'Data Analysis with Dr Mike Pound',

4907

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4908

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4909

'uploader': 'Computerphile',

4910

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4911

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4912

'tags': [],

4913

'view_count': int,

4914

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4915

'channel_url': 'https://www.youtube.com/user/Computerphile',

4916

'channel': 'Computerphile',

4917

},

4918

'playlist_mincount': 11,

4919

}, {

4920

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4921

'only_matching': True,

4922

}, {

4923

'note': 'Playlist URL that does not actually serve a playlist',

4924

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4929

'uploader': 'STREEM',

4930

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4931

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4932

'upload_date': '20150526',

4933

'license': 'Standard YouTube License',

4934

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4935

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4942

},

4943

'skip': 'This video is not available.',

4944

'add_ie': [YoutubeIE.ie_key()],

4945

}, {

4946

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4947

'only_matching': True,

4948

}, {

4949

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4950

'only_matching': True,

4951

}, {

4952

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4953

'info_dict': {

4954

'id': 'GgL890LIznQ', # This will keep changing

4955

'ext': 'mp4',

4956

'title': str,

4957

'uploader': 'Sky News',

4958

'uploader_id': 'skynews',

4959

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4960

'upload_date': r're:\d{8}',

4961

'description': str,

4962

'categories': ['News & Politics'],

4963

'tags': list,

4964

'like_count': int,

4965

'release_timestamp': 1642502819,

4966

'channel': 'Sky News',

4967

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4968

'age_limit': 0,

4969

'view_count': int,

4970

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4971

'playable_in_embed': True,

4972

'release_date': '20220118',

4973

'availability': 'public',

4974

'live_status': 'is_live',

4975

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4976

'channel_follower_count': int

4977

},

4978

'params': {

4979

'skip_download': True,

4980

},

4981

'expected_warnings': ['Ignoring subtitle tracks found in '],

4982

}, {

4983

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4988

'uploader': 'The Young Turks',

4989

'uploader_id': 'TheYoungTurks',

4990

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4991

'upload_date': '20150715',

4992

'license': 'Standard YouTube License',

4993

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4994

'categories': ['News & Politics'],

4995

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5000

},

5001

'only_matching': True,

5002

}, {

5003

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5004

'only_matching': True,

5005

}, {

5006

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5007

'only_matching': True,

5008

}, {

5009

'note': 'A channel that is not live. Should raise error',

5010

'url': 'https://www.youtube.com/user/numberphile/live',

5011

'only_matching': True,

5012

}, {

5013

'url': 'https://www.youtube.com/feed/trending',

5014

'only_matching': True,

5015

}, {

5016

'url': 'https://www.youtube.com/feed/library',

5017

'only_matching': True,

5018

}, {

5019

'url': 'https://www.youtube.com/feed/history',

5020

'only_matching': True,

5021

}, {

5022

'url': 'https://www.youtube.com/feed/subscriptions',

5023

'only_matching': True,

5024

}, {

5025

'url': 'https://www.youtube.com/feed/watch_later',

5026

'only_matching': True,

5027

}, {

5028

'note': 'Recommended - redirects to home page.',

5029

'url': 'https://www.youtube.com/feed/recommended',

5030

'only_matching': True,

5031

}, {

5032

'note': 'inline playlist with not always working continuations',

5033

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5034

'only_matching': True,

5035

}, {

5036

'url': 'https://www.youtube.com/course',

5037

'only_matching': True,

5038

}, {

5039

'url': 'https://www.youtube.com/zsecurity',

5040

'only_matching': True,

5041

}, {

5042

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5043

'only_matching': True,

5044

}, {

5045

'url': 'https://www.youtube.com/TheYoungTurks/live',

5046

'only_matching': True,

5047

}, {

5048

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5055

}, {

5056

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5057

'only_matching': True,

5058

}, {

5059

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5060

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5061

'only_matching': True

5062

}, {

5063

'note': '/browse/ should redirect to /channel/',

5064

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5065

'only_matching': True

5066

}, {

5067

'note': 'VLPL, should redirect to playlist?list=PL...',

5068

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5069

'info_dict': {

5070

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5071

'uploader': 'NoCopyrightSounds',

5072

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5073

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5074

'title': 'NCS Releases',

5075

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5076

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5077

'modified_date': r're:\d{8}',

5078

'view_count': int,

5079

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5080

'tags': [],

5081

'channel': 'NoCopyrightSounds',

5082

},

5083

'playlist_mincount': 166,

5084

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5085

}, {

5086

'note': 'Topic, should redirect to playlist?list=UU...',

5087

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5088

'info_dict': {

5089

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5090

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5091

'title': 'Uploads from Royalty Free Music - Topic',

5092

'uploader': 'Royalty Free Music - Topic',

5093

'tags': [],

5094

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5095

'channel': 'Royalty Free Music - Topic',

5096

'view_count': int,

5097

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5098

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5099

'modified_date': r're:\d{8}',

5100

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5101

'description': '',

5102

},

5103

'expected_warnings': [

5104

'The URL does not have a videos tab',

5105

r'[Uu]navailable videos (are|will be) hidden',

5106

],

5107

'playlist_mincount': 101,

5108

}, {

5109

'note': 'Topic without a UU playlist',

5110

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5111

'info_dict': {

5112

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5113

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5114

'tags': [],

5115

},

5116

'expected_warnings': [

5117

'the playlist redirect gave error',

5118

],

5119

'playlist_mincount': 9,

5120

}, {

5121

'note': 'Youtube music Album',

5122

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5123

'info_dict': {

5124

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5125

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5130

'modified_date': r're:\d{8}',

5131

},

5132

'playlist_count': 50,

5133

}, {

5134

'note': 'unlisted single video playlist',

5135

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5136

'info_dict': {

5137

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5138

'uploader': 'colethedj',

5139

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5140

'title': 'yt-dlp unlisted playlist test',

5141

'availability': 'unlisted',

5142

'tags': [],

5143

'modified_date': '20211208',

5144

'channel': 'colethedj',

5145

'view_count': int,

5146

'description': '',

5147

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5148

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5149

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5154

'url': 'https://www.youtube.com/feed/recommended',

5155

'info_dict': {

5156

'id': 'recommended',

5157

'title': 'recommended',

5158

'tags': [],

5159

},

5160

'playlist_mincount': 50,

5161

'params': {

5162

'skip_download': True,

5163

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5164

},

5165

}, {

5166

'note': 'API Fallback: /videos tab, sorted by oldest first',

5167

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5168

'info_dict': {

5169

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5170

'title': 'Cody\'sLab - Videos',

5171

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5172

'uploader': 'Cody\'sLab',

5173

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5174

'channel': 'Cody\'sLab',

5175

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5176

'tags': [],

5177

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5178

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5179

'channel_follower_count': int

5180

},

5181

'playlist_mincount': 650,

5182

'params': {

5183

'skip_download': True,

5184

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5185

},

5186

}, {

5187

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5188

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5189

'info_dict': {

5190

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5191

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5192

'title': 'Uploads from Royalty Free Music - Topic',

5193

'uploader': 'Royalty Free Music - Topic',

5194

'modified_date': r're:\d{8}',

5195

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5196

'description': '',

5197

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5198

'tags': [],

5199

'channel': 'Royalty Free Music - Topic',

5200

'view_count': int,

5201

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5202

},

5203

'expected_warnings': [

5204

'does not have a videos tab',

5205

r'[Uu]navailable videos (are|will be) hidden',

5206

],

5207

'playlist_mincount': 101,

5208

'params': {

5209

'skip_download': True,

5210

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5211

},

5212

}, {

5213

'note': 'non-standard redirect to regional channel',

5214

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5215

'only_matching': True

5216

}, {

5217

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5218

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5219

'info_dict': {

5220

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5221

'modified_date': '20220407',

5222

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5223

'tags': [],

5224

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5225

'uploader': 'pukkandan',

5226

'availability': 'unlisted',

5227

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5228

'channel': 'pukkandan',

5229

'description': 'Test for collaborative playlist',

5230

'title': 'yt-dlp test - collaborative playlist',

5231

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5232

},

5233

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5238

return False if YoutubeIE.suitable(url) else super().suitable(url)

5239

5240

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5241

5242

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5243

def _real_extract(self, url, smuggled_data):

5244

item_id = self._match_id(url)

5245

url = compat_urlparse.urlunparse(

5246

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5247

compat_opts = self.get_param('compat_opts', [])

5248

5249

def get_mobj(url):

5250

mobj = self._URL_RE.match(url).groupdict()

5251

mobj.update((k, '') for k, v in mobj.items() if v is None)

5252

return mobj

5253

5254

mobj, redirect_warning = get_mobj(url), None

5255

# Youtube returns incomplete data if tabname is not lower case

5256

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5257

if is_channel:

5258

if smuggled_data.get('is_music_url'):

5259

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5260

item_id = item_id[2:]

5261

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5262

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5263

mdata = self._extract_tab_endpoint(

5264

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5265

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5266

get_all=False, expected_type=compat_str)

5267

if not murl:

5268

raise ExtractorError('Failed to resolve album to playlist')

5269

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5270

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5271

pre = f'https://www.youtube.com/channel/{item_id}'

5272

5273

original_tab_name = tab

5274

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5275

# Home URLs should redirect to /videos/

5276

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5277

'To download only the videos in the home page, add a "/featured" to the URL')

5278

tab = '/videos'

5279

5280

url = ''.join((pre, tab, post))

5281

mobj = get_mobj(url)

5282

5283

# Handle both video/playlist URLs

5284

qs = parse_qs(url)

5285

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5286

5287

if not video_id and mobj['not_channel'].startswith('watch'):

5288

if not playlist_id:

5289

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5290

raise ExtractorError('Unable to recognize tab page')

5291

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5292

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5293

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5294

mobj = get_mobj(url)

5295

5296

if video_id and playlist_id:

5297

if self.get_param('noplaylist'):

5298

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5299

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5300

ie=YoutubeIE.ie_key(), video_id=video_id)

5301

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5302

5303

data, ytcfg = self._extract_data(url, item_id)

5304

5305

# YouTube may provide a non-standard redirect to the regional channel

5306

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5307

redirect_url = traverse_obj(

5308

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5309

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5310

redirect_url = ''.join((

5311

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5312

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5313

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5314

5315

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5316

if tabs:

5317

selected_tab = self._extract_selected_tab(tabs)

5318

selected_tab_name = selected_tab.get('title', '').lower()

5319

if selected_tab_name == 'home':

5320

selected_tab_name = 'featured'

5321

requested_tab_name = mobj['tab'][1:]

5322

if 'no-youtube-channel-redirect' not in compat_opts:

5323

if requested_tab_name == 'live':

5324

# Live tab should have redirected to the video

5325

raise ExtractorError('The channel is not currently live', expected=True)

5326

if requested_tab_name not in ('', selected_tab_name):

5327

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5328

if not original_tab_name:

5329

if item_id[:2] == 'UC':

5330

# Topic channels don't have /videos. Use the equivalent playlist instead

5331

pl_id = f'UU{item_id[2:]}'

5332

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5333

try:

5334

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5335

except ExtractorError:

5336

redirect_warning += ' and the playlist redirect gave error'

5337

else:

5338

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5339

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5340

if selected_tab_name and selected_tab_name != requested_tab_name:

5341

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5342

else:

5343

raise ExtractorError(redirect_warning, expected=True)

5344

5345

if redirect_warning:

5346

self.to_screen(redirect_warning)

5347

self.write_debug(f'Final URL: {url}')

5348

5349

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5350

if 'no-youtube-unavailable-videos' not in compat_opts:

5351

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5352

self._extract_and_report_alerts(data, only_once=True)

5353

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5354

if tabs:

5355

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5356

5357

playlist = traverse_obj(

5358

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5359

if playlist:

5360

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5361

5362

video_id = traverse_obj(

5363

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5364

if video_id:

5365

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5366

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5367

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5368

ie=YoutubeIE.ie_key(), video_id=video_id)

5369

5370

raise ExtractorError('Unable to recognize tab page')

5371

5372

5373

class YoutubePlaylistIE(InfoExtractor):

5374

IE_DESC = 'YouTube playlists'

5375

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5386

)''' % {

5387

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5388

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5389

}

5390

IE_NAME = 'youtube:playlist'

5391

_TESTS = [{

5392

'note': 'issue #673',

5393

'url': 'PLBB231211A4F62143',

5394

'info_dict': {

5395

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5396

'id': 'PLBB231211A4F62143',

5397

'uploader': 'Wickman',

5398

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5399

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5400

'view_count': int,

5401

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5402

'modified_date': r're:\d{8}',

5403

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5404

'channel': 'Wickman',

5405

'tags': [],

5406

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5407

},

5408

'playlist_mincount': 29,

5409

}, {

5410

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5411

'info_dict': {

5412

'title': 'YDL_safe_search',

5413

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5414

},

5415

'playlist_count': 2,

5416

'skip': 'This playlist is private',

5417

}, {

5418

'note': 'embedded',

5419

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5424

'uploader': 'milan',

5425

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5426

'description': '',

5427

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5428

'tags': [],

5429

'modified_date': '20140919',

5430

'view_count': int,

5431

'channel': 'milan',

5432

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5433

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5434

},

5435

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5436

}, {

5437

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5438

'playlist_mincount': 654,

5439

'info_dict': {

5440

'title': '2018 Chinese New Singles (11/6 updated)',

5441

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5442

'uploader': 'LBK',

5443

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5444

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5445

'channel': 'LBK',

5446

'view_count': int,

5447

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5448

'tags': [],

5449

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5450

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5451

'modified_date': r're:\d{8}',

5452

},

5453

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5454

}, {

5455

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5456

'only_matching': True,

5457

}, {

5458

# music album playlist

5459

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5460

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5465

if YoutubeTabIE.suitable(url):

5466

return False

5467

from ..utils import parse_qs

5468

qs = parse_qs(url)

5469

if qs.get('v', [None])[0]:

5470

return False

5471

return super().suitable(url)

5472

5473

def _real_extract(self, url):

5474

playlist_id = self._match_id(url)

5475

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5476

url = update_url_query(

5477

'https://www.youtube.com/playlist',

5478

parse_qs(url) or {'list': playlist_id})

5479

if is_music_url:

5480

url = smuggle_url(url, {'is_music_url': True})

5481

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5482

5483

5484

class YoutubeYtBeIE(InfoExtractor):

5485

IE_DESC = 'youtu.be'

5486

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5487

_TESTS = [{

5488

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5493

'uploader': 'Backus-Page House Museum',

5494

'uploader_id': 'backuspagemuseum',

5495

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5496

'upload_date': '20161008',

5497

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5498

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5503

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5504

'channel': 'Backus-Page House Museum',

5505

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5506

'live_status': 'not_live',

5507

'view_count': int,

5508

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5509

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5515

},

5516

}, {

5517

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5518

'only_matching': True,

5519

}]

5520

5521

def _real_extract(self, url):

5522

mobj = self._match_valid_url(url)

5523

video_id = mobj.group('id')

5524

playlist_id = mobj.group('playlist_id')

5525

return self.url_result(

5526

update_url_query('https://www.youtube.com/watch', {

5527

'v': video_id,

5528

'list': playlist_id,

5529

'feature': 'youtu.be',

5530

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5531

5532

5533

class YoutubeLivestreamEmbedIE(InfoExtractor):

5534

IE_DESC = 'YouTube livestream embeds'

5535

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5536

_TESTS = [{

5537

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5538

'only_matching': True,

5539

}]

5540

5541

def _real_extract(self, url):

5542

channel_id = self._match_id(url)

5543

return self.url_result(

5544

f'https://www.youtube.com/channel/{channel_id}/live',

5545

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5546

5547

5548

class YoutubeYtUserIE(InfoExtractor):

5549

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5550

IE_NAME = 'youtube:user'

5551

_VALID_URL = r'ytuser:(?P<id>.+)'

5552

_TESTS = [{

5553

'url': 'ytuser:phihag',

5554

'only_matching': True,

5555

}]

5556

5557

def _real_extract(self, url):

5558

user_id = self._match_id(url)

5559

return self.url_result(

5560

'https://www.youtube.com/user/%s/videos' % user_id,

5561

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5562

5563

5564

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5565

IE_NAME = 'youtube:favorites'

5566

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5567

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5568

_LOGIN_REQUIRED = True

5569

_TESTS = [{

5570

'url': ':ytfav',

5571

'only_matching': True,

5572

}, {

5573

'url': ':ytfavorites',

5574

'only_matching': True,

5575

}]

5576

5577

def _real_extract(self, url):

5578

return self.url_result(

5579

'https://www.youtube.com/playlist?list=LL',

5580

ie=YoutubeTabIE.ie_key())

5581

5582

5583

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5584

IE_NAME = 'youtube:notif'

5585

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5586

_VALID_URL = r':ytnotif(?:ication)?s?'

5587

_LOGIN_REQUIRED = True

5588

_TESTS = [{

5589

'url': ':ytnotif',

5590

'only_matching': True,

5591

}, {

5592

'url': ':ytnotifications',

5593

'only_matching': True,

5594

}]

5595

5596

def _extract_notification_menu(self, response, continuation_list):

5597

notification_list = traverse_obj(

5598

response,

5599

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5600

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5601

expected_type=list) or []

5602

continuation_list[0] = None

5603

for item in notification_list:

5604

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5605

if entry:

5606

yield entry

5607

continuation = item.get('continuationItemRenderer')

5608

if continuation:

5609

continuation_list[0] = continuation

5610

5611

def _extract_notification_renderer(self, notification):

5612

video_id = traverse_obj(

5613

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5614

url = f'https://www.youtube.com/watch?v={video_id}'

5615

channel_id = None

5616

if not video_id:

5617

browse_ep = traverse_obj(

5618

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5619

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5620

post_id = self._search_regex(

5621

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5622

'post id', default=None)

5623

if not channel_id or not post_id:

5624

return

5625

# The direct /post url redirects to this in the browser

5626

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5627

5628

channel = traverse_obj(

5629

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5630

expected_type=str)

5631

title = self._search_regex(

5632

rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),

5633

'video title', default=None)

5634

if title:

5635

title = title.replace('\xad', '') # remove soft hyphens

5636

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5637

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5643

'video_id': video_id,

5644

'title': title,

5645

'channel_id': channel_id,

5646

'channel': channel,

5647

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5648

'upload_date': upload_date,

5649

}

5650

5651

def _notification_menu_entries(self, ytcfg):

5652

continuation_list = [None]

5653

response = None

5654

for page in itertools.count(1):

5655

ctoken = traverse_obj(

5656

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5657

response = self._extract_response(

5658

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5659

ep='notification/get_notification_menu', check_get_keys='actions',

5660

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5661

yield from self._extract_notification_menu(response, continuation_list)

5662

if not continuation_list[0]:

5663

break

5664

5665

def _real_extract(self, url):

5666

display_id = 'notifications'

5667

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5668

self._report_playlist_authcheck(ytcfg)

5669

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5670

5671

5672

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5673

IE_DESC = 'YouTube search'

5674

IE_NAME = 'youtube:search'

5675

_SEARCH_KEY = 'ytsearch'

5676

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5677

_TESTS = [{

5678

'url': 'ytsearch5:youtube-dl test video',

5679

'playlist_count': 5,

5680

'info_dict': {

5681

'id': 'youtube-dl test video',

5682

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5688

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5689

_SEARCH_KEY = 'ytsearchdate'

5690

IE_DESC = 'YouTube search, newest videos first'

5691

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5692

_TESTS = [{

5693

'url': 'ytsearchdate5:youtube-dl test video',

5694

'playlist_count': 5,

5695

'info_dict': {

5696

'id': 'youtube-dl test video',

5697

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5703

IE_DESC = 'YouTube search URLs with sorting and filter support'

5704

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5705

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5706

_TESTS = [{

5707

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5708

'playlist_mincount': 5,

5709

'info_dict': {

5710

'id': 'youtube-dl test video',

5711

'title': 'youtube-dl test video',

5712

}

5713

}, {

5714

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5715

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5722

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5733

'only_matching': True,

5734

}]

5735

5736

def _real_extract(self, url):

5737

qs = parse_qs(url)

5738

query = (qs.get('search_query') or qs.get('q'))[0]

5739

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5740

5741

5742

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5743

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5744

IE_NAME = 'youtube:music:search_url'

5745

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5746

_TESTS = [{

5747

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5748

'playlist_count': 16,

5749

'info_dict': {

5750

'id': 'royalty free music',

5751

'title': 'royalty free music',

5752

}

5753

}, {

5754

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5755

'playlist_mincount': 30,

5756

'info_dict': {

5757

'id': 'royalty free music - songs',

5758

'title': 'royalty free music - songs',

5759

},

5760

'params': {'extract_flat': 'in_playlist'}

5761

}, {

5762

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5763

'playlist_mincount': 30,

5764

'info_dict': {

5765

'id': 'royalty free music - community playlists',

5766

'title': 'royalty free music - community playlists',

5767

},

5768

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5773

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5774

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5775

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5776

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5777

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5778

}

5779

5780

def _real_extract(self, url):

5781

qs = parse_qs(url)

5782

query = (qs.get('search_query') or qs.get('q'))[0]

5783

params = qs.get('sp', (None,))[0]

5784

if params:

5785

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5786

else:

5787

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5788

params = self._SECTIONS.get(section)

5789

if not params:

5790

section = None

5791

title = join_nonempty(query, section, delim=' - ')

5792

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5793

5794

5795

class YoutubeFeedsInfoExtractor(InfoExtractor):

5796

"""

5797

Base class for feed extractors

5798

Subclasses must re-define the _FEED_NAME property.

5799

"""

5800

_LOGIN_REQUIRED = True

5801

_FEED_NAME = 'feeds'

5802

5803

def _real_initialize(self):

5804

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5809

5810

def _real_extract(self, url):

5811

return self.url_result(

5812

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5813

5814

5815

class YoutubeWatchLaterIE(InfoExtractor):

5816

IE_NAME = 'youtube:watchlater'

5817

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5818

_VALID_URL = r':ytwatchlater'

5819

_TESTS = [{

5820

'url': ':ytwatchlater',

5821

'only_matching': True,

5822

}]

5823

5824

def _real_extract(self, url):

5825

return self.url_result(

5826

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5827

5828

5829

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5830

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5831

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5832

_FEED_NAME = 'recommended'

5833

_LOGIN_REQUIRED = False

5834

_TESTS = [{

5835

'url': ':ytrec',

5836

'only_matching': True,

5837

}, {

5838

'url': ':ytrecommended',

5839

'only_matching': True,

5840

}, {

5841

'url': 'https://youtube.com',

5842

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5847

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5848

_VALID_URL = r':ytsub(?:scription)?s?'

5849

_FEED_NAME = 'subscriptions'

5850

_TESTS = [{

5851

'url': ':ytsubs',

5852

'only_matching': True,

5853

}, {

5854

'url': ':ytsubscriptions',

5855

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5860

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5861

_VALID_URL = r':ythis(?:tory)?'

5862

_FEED_NAME = 'history'

5863

_TESTS = [{

5864

'url': ':ythistory',

5865

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5870

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5871

IE_NAME = 'youtube:stories'

5872

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5873

_TESTS = [{

5874

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5875

'only_matching': True,

5876

}]

5877

5878

def _real_extract(self, url):

5879

playlist_id = f'RLTD{self._match_id(url)}'

5880

return self.url_result(

5881

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5882

ie=YoutubeTabIE, video_id=playlist_id)

5883

5884

5885

class YoutubeTruncatedURLIE(InfoExtractor):

5886

IE_NAME = 'youtube:truncated_url'

5887

IE_DESC = False # Do not list

5888

_VALID_URL = r'''(?x)

5889

(?:https?://)?

5890

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5891

(?:watch\?(?:

5892

feature=[a-z_]+|

5893

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5906

'only_matching': True,

5907

}, {

5908

'url': 'https://www.youtube.com/watch?',

5909

'only_matching': True,

5910

}, {

5911

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5912

'only_matching': True,

5913

}, {

5914

'url': 'https://www.youtube.com/watch?feature=foo',

5915

'only_matching': True,

5916

}, {

5917

'url': 'https://www.youtube.com/watch?hl=en-GB',

5918

'only_matching': True,

5919

}, {

5920

'url': 'https://www.youtube.com/watch?t=2372',

5921

'only_matching': True,

5922

}]

5923

5924

def _real_extract(self, url):

5925

raise ExtractorError(

5926

'Did you forget to quote the URL? Remember that & is a meta '

5927

'character in most shells, so you want to put the URL in quotes, '

5928

'like youtube-dl '

5929

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5930

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5935

IE_NAME = 'youtube:clip'

5936

IE_DESC = False # Do not list

5937

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5938

5939

def _real_extract(self, url):

5940

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5941

return self.url_result(url, 'Generic')

5942

5943

5944

class YoutubeTruncatedIDIE(InfoExtractor):

5945

IE_NAME = 'youtube:truncated_id'

5946

IE_DESC = False # Do not list

5947

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5948

5949

_TESTS = [{

5950

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5951

'only_matching': True,

5952

}]

5953

5954

def _real_extract(self, url):

5955

video_id = self._match_id(url)

5956

raise ExtractorError(

5957

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

5958

expected=True)