jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import functools # isort: split
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	classproperty,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	float_or_none,
	40	format_field,
	41	get_first,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73	# any clients starting with _ cannot be explicity requested by the user
	74	INNERTUBE_CLIENTS = {
	75	'web': {
	76	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	77	'INNERTUBE_CONTEXT': {
	78	'client': {
	79	'clientName': 'WEB',
	80	'clientVersion': '2.20211221.00.00',
	81	}
	82	},
	83	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	84	},
	85	'web_embedded': {
	86	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	87	'INNERTUBE_CONTEXT': {
	88	'client': {
	89	'clientName': 'WEB_EMBEDDED_PLAYER',
	90	'clientVersion': '1.20211215.00.01',
	91	},
	92	},
	93	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	94	},
	95	'web_music': {
	96	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	97	'INNERTUBE_HOST': 'music.youtube.com',
	98	'INNERTUBE_CONTEXT': {
	99	'client': {
	100	'clientName': 'WEB_REMIX',
	101	'clientVersion': '1.20211213.00.00',
	102	}
	103	},
	104	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	105	},
	106	'web_creator': {
	107	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	108	'INNERTUBE_CONTEXT': {
	109	'client': {
	110	'clientName': 'WEB_CREATOR',
	111	'clientVersion': '1.20211220.02.00',
	112	}
	113	},
	114	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	115	},
	116	'android': {
	117	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	118	'INNERTUBE_CONTEXT': {
	119	'client': {
	120	'clientName': 'ANDROID',
	121	'clientVersion': '16.49',
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '16.49',
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '4.57',
	144	}
	145	},
	146	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	147	'REQUIRE_JS_PLAYER': False
	148	},
	149	'android_creator': {
	150	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	151	'INNERTUBE_CONTEXT': {
	152	'client': {
	153	'clientName': 'ANDROID_CREATOR',
	154	'clientVersion': '21.47',
	155	},
	156	},
	157	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	158	'REQUIRE_JS_PLAYER': False
	159	},
	160	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	161	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	162	'ios': {
	163	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	164	'INNERTUBE_CONTEXT': {
	165	'client': {
	166	'clientName': 'IOS',
	167	'clientVersion': '16.46',
	168	'deviceModel': 'iPhone14,3',
	169	}
	170	},
	171	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	172	'REQUIRE_JS_PLAYER': False
	173	},
	174	'ios_embedded': {
	175	'INNERTUBE_CONTEXT': {
	176	'client': {
	177	'clientName': 'IOS_MESSAGES_EXTENSION',
	178	'clientVersion': '16.46',
	179	'deviceModel': 'iPhone14,3',
	180	},
	181	},
	182	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	183	'REQUIRE_JS_PLAYER': False
	184	},
	185	'ios_music': {
	186	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	187	'INNERTUBE_CONTEXT': {
	188	'client': {
	189	'clientName': 'IOS_MUSIC',
	190	'clientVersion': '4.57',
	191	},
	192	},
	193	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	194	'REQUIRE_JS_PLAYER': False
	195	},
	196	'ios_creator': {
	197	'INNERTUBE_CONTEXT': {
	198	'client': {
	199	'clientName': 'IOS_CREATOR',
	200	'clientVersion': '21.47',
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	# mweb has 'ultralow' formats
	207	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	208	'mweb': {
	209	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	210	'INNERTUBE_CONTEXT': {
	211	'client': {
	212	'clientName': 'MWEB',
	213	'clientVersion': '2.20211221.01.00',
	214	}
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	217	},
	218	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	219	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	220	'tv_embedded': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	225	'clientVersion': '2.0',
	226	},
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	229	},
	230	}
	231
	232
	233	def _split_innertube_client(client_name):
	234	variant, *base = client_name.rsplit('.', 1)
	235	if base:
	236	return variant, base[0], variant
	237	base, *variant = client_name.split('_', 1)
	238	return client_name, base, variant[0] if variant else None
	239
	240
	241	def build_innertube_clients():
	242	THIRD_PARTY = {
	243	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	244	}
	245	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	246	priority = qualities(BASE_CLIENTS[::-1])
	247
	248	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	249	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	250	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	251	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	252	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	253
	254	_, base_client, variant = _split_innertube_client(client)
	255	ytcfg['priority'] = 10 * priority(base_client)
	256
	257	if not variant:
	258	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	259	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	260	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	261	embedscreen['priority'] -= 3
	262	elif variant == 'embedded':
	263	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	ytcfg['priority'] -= 2
	265	else:
	266	ytcfg['priority'] -= 3
	267
	268
	269	build_innertube_clients()
	270
	271
	272	class YoutubeBaseInfoExtractor(InfoExtractor):
	273	"""Provide base functions for Youtube extractors"""
	274
	275	_RESERVED_NAMES = (
	276	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	277	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	278	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	279	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	280
	281	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	282
	283	# _NETRC_MACHINE = 'youtube'
	284
	285	# If True it will raise an error if no login info is provided
	286	_LOGIN_REQUIRED = False
	287
	288	_INVIDIOUS_SITES = (
	289	# invidious-redirect websites
	290	r'(?:www\.)?redirect\.invidious\.io',
	291	r'(?:(?:www\|dev)\.)?invidio\.us',
	292	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	293	r'(?:www\.)?invidious\.pussthecat\.org',
	294	r'(?:www\.)?invidious\.zee\.li',
	295	r'(?:www\.)?invidious\.ethibox\.fr',
	296	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	297	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	298	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	299	# youtube-dl invidious instances list
	300	r'(?:(?:www\|no)\.)?invidiou\.sh',
	301	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	302	r'(?:www\.)?invidious\.kabi\.tk',
	303	r'(?:www\.)?invidious\.mastodon\.host',
	304	r'(?:www\.)?invidious\.zapashcanon\.fr',
	305	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	306	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	307	r'(?:www\.)?invidious\.himiko\.cloud',
	308	r'(?:www\.)?invidious\.reallyancient\.tech',
	309	r'(?:www\.)?invidious\.tube',
	310	r'(?:www\.)?invidiou\.site',
	311	r'(?:www\.)?invidious\.site',
	312	r'(?:www\.)?invidious\.xyz',
	313	r'(?:www\.)?invidious\.nixnet\.xyz',
	314	r'(?:www\.)?invidious\.048596\.xyz',
	315	r'(?:www\.)?invidious\.drycat\.fr',
	316	r'(?:www\.)?inv\.skyn3t\.in',
	317	r'(?:www\.)?tube\.poal\.co',
	318	r'(?:www\.)?tube\.connect\.cafe',
	319	r'(?:www\.)?vid\.wxzm\.sx',
	320	r'(?:www\.)?vid\.mint\.lgbt',
	321	r'(?:www\.)?vid\.puffyan\.us',
	322	r'(?:www\.)?yewtu\.be',
	323	r'(?:www\.)?yt\.elukerio\.org',
	324	r'(?:www\.)?yt\.lelux\.fi',
	325	r'(?:www\.)?invidious\.ggc-project\.de',
	326	r'(?:www\.)?yt\.maisputain\.ovh',
	327	r'(?:www\.)?ytprivate\.com',
	328	r'(?:www\.)?invidious\.13ad\.de',
	329	r'(?:www\.)?invidious\.toot\.koeln',
	330	r'(?:www\.)?invidious\.fdn\.fr',
	331	r'(?:www\.)?watch\.nettohikari\.com',
	332	r'(?:www\.)?invidious\.namazso\.eu',
	333	r'(?:www\.)?invidious\.silkky\.cloud',
	334	r'(?:www\.)?invidious\.exonip\.de',
	335	r'(?:www\.)?invidious\.riverside\.rocks',
	336	r'(?:www\.)?invidious\.blamefran\.net',
	337	r'(?:www\.)?invidious\.moomoo\.de',
	338	r'(?:www\.)?ytb\.trom\.tf',
	339	r'(?:www\.)?yt\.cyberhost\.uk',
	340	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	341	r'(?:www\.)?qklhadlycap4cnod\.onion',
	342	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	343	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	344	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	345	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	346	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	347	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	348	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	349	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	350	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	351	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	352	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	353	r'(?:www\.)?piped\.kavin\.rocks',
	354	r'(?:www\.)?piped\.silkky\.cloud',
	355	r'(?:www\.)?piped\.tokhmi\.xyz',
	356	r'(?:www\.)?piped\.moomoo\.me',
	357	r'(?:www\.)?il\.ax',
	358	r'(?:www\.)?piped\.syncpundit\.com',
	359	r'(?:www\.)?piped\.mha\.fi',
	360	r'(?:www\.)?piped\.mint\.lgbt',
	361	r'(?:www\.)?piped\.privacy\.com\.de',
	362	)
	363
	364	def _initialize_consent(self):
	365	cookies = self._get_cookies('https://www.youtube.com/')
	366	if cookies.get('__Secure-3PSID'):
	367	return
	368	consent_id = None
	369	consent = cookies.get('CONSENT')
	370	if consent:
	371	if 'YES' in consent.value:
	372	return
	373	consent_id = self._search_regex(
	374	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	375	if not consent_id:
	376	consent_id = random.randint(100, 999)
	377	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	378
	379	def _initialize_pref(self):
	380	cookies = self._get_cookies('https://www.youtube.com/')
	381	pref_cookie = cookies.get('PREF')
	382	pref = {}
	383	if pref_cookie:
	384	try:
	385	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	386	except ValueError:
	387	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	388	pref.update({'hl': 'en', 'tz': 'UTC'})
	389	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	390
	391	def _real_initialize(self):
	392	self._initialize_pref()
	393	self._initialize_consent()
	394	self._check_login_required()
	395
	396	def _check_login_required(self):
	397	if self._LOGIN_REQUIRED and not self._cookies_passed:
	398	self.raise_login_required('Login details are needed to download this content', method='cookies')
	399
	400	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	401	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	402
	403	def _get_default_ytcfg(self, client='web'):
	404	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	405
	406	def _get_innertube_host(self, client='web'):
	407	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	408
	409	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	410	# try_get but with fallback to default ytcfg client values when present
	411	_func = lambda y: try_get(y, getter, expected_type)
	412	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	413
	414	def _extract_client_name(self, ytcfg, default_client='web'):
	415	return self._ytcfg_get_safe(
	416	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	417	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	418
	419	def _extract_client_version(self, ytcfg, default_client='web'):
	420	return self._ytcfg_get_safe(
	421	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	422	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	423
	424	def _extract_api_key(self, ytcfg=None, default_client='web'):
	425	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	426
	427	def _extract_context(self, ytcfg=None, default_client='web'):
	428	context = get_first(
	429	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	430	# Enforce language and tz for extraction
	431	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	432	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	433	return context
	434
	435	_SAPISID = None
	436
	437	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	438	time_now = round(time.time())
	439	if self._SAPISID is None:
	440	yt_cookies = self._get_cookies('https://www.youtube.com')
	441	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	442	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	443	sapisid_cookie = dict_get(
	444	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	445	if sapisid_cookie and sapisid_cookie.value:
	446	self._SAPISID = sapisid_cookie.value
	447	self.write_debug('Extracted SAPISID cookie')
	448	# SAPISID cookie is required if not already present
	449	if not yt_cookies.get('SAPISID'):
	450	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	451	self._set_cookie(
	452	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	453	else:
	454	self._SAPISID = False
	455	if not self._SAPISID:
	456	return None
	457	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	458	sapisidhash = hashlib.sha1(
	459	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	460	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	461
	462	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	463	note='Downloading API JSON', errnote='Unable to download API page',
	464	context=None, api_key=None, api_hostname=None, default_client='web'):
	465
	466	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	467	data.update(query)
	468	real_headers = self.generate_api_headers(default_client=default_client)
	469	real_headers.update({'content-type': 'application/json'})
	470	if headers:
	471	real_headers.update(headers)
	472	return self._download_json(
	473	f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
	474	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	475	data=json.dumps(data).encode('utf8'), headers=real_headers,
	476	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	477
	478	def extract_yt_initial_data(self, item_id, webpage):
	479	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=True)
	480
	481	def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
	482	return self._parse_json(self._search_regex(
	483	(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	484	regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)
	485
	486	@staticmethod
	487	def _extract_session_index(*data):
	488	"""
	489	Index of current account in account list.
	490	See: https://github.com/yt-dlp/yt-dlp/pull/519
	491	"""
	492	for ytcfg in data:
	493	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	494	if session_index is not None:
	495	return session_index
	496
	497	# Deprecated?
	498	def _extract_identity_token(self, ytcfg=None, webpage=None):
	499	if ytcfg:
	500	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import functools # isort: split

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

74

INNERTUBE_CLIENTS = {

75

'web': {

76

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

77

'INNERTUBE_CONTEXT': {

78

'client': {

79

'clientName': 'WEB',

80

'clientVersion': '2.20211221.00.00',

81

}

82

},

83

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

84

},

85

'web_embedded': {

86

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

87

'INNERTUBE_CONTEXT': {

88

'client': {

89

'clientName': 'WEB_EMBEDDED_PLAYER',

90

'clientVersion': '1.20211215.00.01',

91

},

92

},

93

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

94

},

95

'web_music': {

96

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

97

'INNERTUBE_HOST': 'music.youtube.com',

98

'INNERTUBE_CONTEXT': {

99

'client': {

100

'clientName': 'WEB_REMIX',

101

'clientVersion': '1.20211213.00.00',

102

}

103

},

104

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

105

},

106

'web_creator': {

107

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

108

'INNERTUBE_CONTEXT': {

109

'client': {

110

'clientName': 'WEB_CREATOR',

111

'clientVersion': '1.20211220.02.00',

112

}

113

},

114

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

115

},

116

'android': {

117

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

118

'INNERTUBE_CONTEXT': {

119

'client': {

120

'clientName': 'ANDROID',

121

'clientVersion': '16.49',

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '16.49',

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '4.57',

144

}

145

},

146

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

147

'REQUIRE_JS_PLAYER': False

148

},

149

'android_creator': {

150

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

151

'INNERTUBE_CONTEXT': {

152

'client': {

153

'clientName': 'ANDROID_CREATOR',

154

'clientVersion': '21.47',

155

},

156

},

157

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

158

'REQUIRE_JS_PLAYER': False

159

},

160

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

161

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

162

'ios': {

163

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

164

'INNERTUBE_CONTEXT': {

165

'client': {

166

'clientName': 'IOS',

167

'clientVersion': '16.46',

168

'deviceModel': 'iPhone14,3',

169

}

170

},

171

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

172

'REQUIRE_JS_PLAYER': False

173

},

174

'ios_embedded': {

175

'INNERTUBE_CONTEXT': {

176

'client': {

177

'clientName': 'IOS_MESSAGES_EXTENSION',

178

'clientVersion': '16.46',

179

'deviceModel': 'iPhone14,3',

180

},

181

},

182

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

183

'REQUIRE_JS_PLAYER': False

184

},

185

'ios_music': {

186

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

187

'INNERTUBE_CONTEXT': {

188

'client': {

189

'clientName': 'IOS_MUSIC',

190

'clientVersion': '4.57',

191

},

192

},

193

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

194

'REQUIRE_JS_PLAYER': False

195

},

196

'ios_creator': {

197

'INNERTUBE_CONTEXT': {

198

'client': {

199

'clientName': 'IOS_CREATOR',

200

'clientVersion': '21.47',

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

204

'REQUIRE_JS_PLAYER': False

205

},

206

# mweb has 'ultralow' formats

207

# See: https://github.com/yt-dlp/yt-dlp/pull/557

208

'mweb': {

209

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

210

'INNERTUBE_CONTEXT': {

211

'client': {

212

'clientName': 'MWEB',

213

'clientVersion': '2.20211221.01.00',

214

}

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

217

},

218

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

219

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

220

'tv_embedded': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

225

'clientVersion': '2.0',

226

},

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

234

variant, *base = client_name.rsplit('.', 1)

235

if base:

236

return variant, base[0], variant

237

base, *variant = client_name.split('_', 1)

238

return client_name, base, variant[0] if variant else None

239

240

241

def build_innertube_clients():

242

THIRD_PARTY = {

243

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

244

}

245

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

246

priority = qualities(BASE_CLIENTS[::-1])

247

248

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

249

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

250

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

251

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

252

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

253

254

_, base_client, variant = _split_innertube_client(client)

255

ytcfg['priority'] = 10 * priority(base_client)

256

257

if not variant:

258

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

259

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

260

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

261

embedscreen['priority'] -= 3

262

elif variant == 'embedded':

263

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

ytcfg['priority'] -= 2

265

else:

266

ytcfg['priority'] -= 3

267

268

269

build_innertube_clients()

270

271

272

class YoutubeBaseInfoExtractor(InfoExtractor):

273

"""Provide base functions for Youtube extractors"""

274

275

_RESERVED_NAMES = (

276

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

282

283

# _NETRC_MACHINE = 'youtube'

284

285

# If True it will raise an error if no login info is provided

286

_LOGIN_REQUIRED = False

287

288

_INVIDIOUS_SITES = (

289

# invidious-redirect websites

290

r'(?:www\.)?redirect\.invidious\.io',

291

r'(?:(?:www|dev)\.)?invidio\.us',

292

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

293

r'(?:www\.)?invidious\.pussthecat\.org',

294

r'(?:www\.)?invidious\.zee\.li',

295

r'(?:www\.)?invidious\.ethibox\.fr',

296

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

297

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

298

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

299

# youtube-dl invidious instances list

300

r'(?:(?:www|no)\.)?invidiou\.sh',

301

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

302

r'(?:www\.)?invidious\.kabi\.tk',

303

r'(?:www\.)?invidious\.mastodon\.host',

304

r'(?:www\.)?invidious\.zapashcanon\.fr',

305

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

306

r'(?:www\.)?invidious\.tinfoil-hat\.net',

307

r'(?:www\.)?invidious\.himiko\.cloud',

308

r'(?:www\.)?invidious\.reallyancient\.tech',

309

r'(?:www\.)?invidious\.tube',

310

r'(?:www\.)?invidiou\.site',

311

r'(?:www\.)?invidious\.site',

312

r'(?:www\.)?invidious\.xyz',

313

r'(?:www\.)?invidious\.nixnet\.xyz',

314

r'(?:www\.)?invidious\.048596\.xyz',

315

r'(?:www\.)?invidious\.drycat\.fr',

316

r'(?:www\.)?inv\.skyn3t\.in',

317

r'(?:www\.)?tube\.poal\.co',

318

r'(?:www\.)?tube\.connect\.cafe',

319

r'(?:www\.)?vid\.wxzm\.sx',

320

r'(?:www\.)?vid\.mint\.lgbt',

321

r'(?:www\.)?vid\.puffyan\.us',

322

r'(?:www\.)?yewtu\.be',

323

r'(?:www\.)?yt\.elukerio\.org',

324

r'(?:www\.)?yt\.lelux\.fi',

325

r'(?:www\.)?invidious\.ggc-project\.de',

326

r'(?:www\.)?yt\.maisputain\.ovh',

327

r'(?:www\.)?ytprivate\.com',

328

r'(?:www\.)?invidious\.13ad\.de',

329

r'(?:www\.)?invidious\.toot\.koeln',

330

r'(?:www\.)?invidious\.fdn\.fr',

331

r'(?:www\.)?watch\.nettohikari\.com',

332

r'(?:www\.)?invidious\.namazso\.eu',

333

r'(?:www\.)?invidious\.silkky\.cloud',

334

r'(?:www\.)?invidious\.exonip\.de',

335

r'(?:www\.)?invidious\.riverside\.rocks',

336

r'(?:www\.)?invidious\.blamefran\.net',

337

r'(?:www\.)?invidious\.moomoo\.de',

338

r'(?:www\.)?ytb\.trom\.tf',

339

r'(?:www\.)?yt\.cyberhost\.uk',

340

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

341

r'(?:www\.)?qklhadlycap4cnod\.onion',

342

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

343

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

344

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

345

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

346

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

347

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

348

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

349

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

350

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

351

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

352

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

353

r'(?:www\.)?piped\.kavin\.rocks',

354

r'(?:www\.)?piped\.silkky\.cloud',

355

r'(?:www\.)?piped\.tokhmi\.xyz',

356

r'(?:www\.)?piped\.moomoo\.me',

357

r'(?:www\.)?il\.ax',

358

r'(?:www\.)?piped\.syncpundit\.com',

359

r'(?:www\.)?piped\.mha\.fi',

360

r'(?:www\.)?piped\.mint\.lgbt',

361

r'(?:www\.)?piped\.privacy\.com\.de',

362

)

363

364

def _initialize_consent(self):

365

cookies = self._get_cookies('https://www.youtube.com/')

366

if cookies.get('__Secure-3PSID'):

367

return

368

consent_id = None

369

consent = cookies.get('CONSENT')

370

if consent:

371

if 'YES' in consent.value:

372

return

373

consent_id = self._search_regex(

374

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

375

if not consent_id:

376

consent_id = random.randint(100, 999)

377

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

378

379

def _initialize_pref(self):

380

cookies = self._get_cookies('https://www.youtube.com/')

381

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

386

except ValueError:

387

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

388

pref.update({'hl': 'en', 'tz': 'UTC'})

389

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

390

391

def _real_initialize(self):

392

self._initialize_pref()

393

self._initialize_consent()

394

self._check_login_required()

395

396

def _check_login_required(self):

397

if self._LOGIN_REQUIRED and not self._cookies_passed:

398

self.raise_login_required('Login details are needed to download this content', method='cookies')

399

400

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

401

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

402

403

def _get_default_ytcfg(self, client='web'):

404

return copy.deepcopy(INNERTUBE_CLIENTS[client])

405

406

def _get_innertube_host(self, client='web'):

407

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

408

409

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

410

# try_get but with fallback to default ytcfg client values when present

411

_func = lambda y: try_get(y, getter, expected_type)

412

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

413

414

def _extract_client_name(self, ytcfg, default_client='web'):

415

return self._ytcfg_get_safe(

416

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

417

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

418

419

def _extract_client_version(self, ytcfg, default_client='web'):

420

return self._ytcfg_get_safe(

421

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

422

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

423

424

def _extract_api_key(self, ytcfg=None, default_client='web'):

425

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

426

427

def _extract_context(self, ytcfg=None, default_client='web'):

428

context = get_first(

429

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

430

# Enforce language and tz for extraction

431

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

432

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

438

time_now = round(time.time())

439

if self._SAPISID is None:

440

yt_cookies = self._get_cookies('https://www.youtube.com')

441

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

442

# See: https://github.com/yt-dlp/yt-dlp/issues/393

443

sapisid_cookie = dict_get(

444

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

445

if sapisid_cookie and sapisid_cookie.value:

446

self._SAPISID = sapisid_cookie.value

447

self.write_debug('Extracted SAPISID cookie')

448

# SAPISID cookie is required if not already present

449

if not yt_cookies.get('SAPISID'):

450

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

451

self._set_cookie(

452

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

453

else:

454

self._SAPISID = False

455

if not self._SAPISID:

456

return None

457

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

458

sapisidhash = hashlib.sha1(

459

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

460

return f'SAPISIDHASH {time_now}_{sapisidhash}'

461

462

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

463

note='Downloading API JSON', errnote='Unable to download API page',

464

context=None, api_key=None, api_hostname=None, default_client='web'):

465

466

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

467

data.update(query)

468

real_headers = self.generate_api_headers(default_client=default_client)

469

real_headers.update({'content-type': 'application/json'})

470

if headers:

471

real_headers.update(headers)

472

return self._download_json(

473

f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',

474

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

475

data=json.dumps(data).encode('utf8'), headers=real_headers,

476

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

477

478

def extract_yt_initial_data(self, item_id, webpage):

479

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=True)

480

481

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

482

return self._parse_json(self._search_regex(

483

(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',

484

regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)

485

486

@staticmethod

487

def _extract_session_index(*data):

488

"""

489

Index of current account in account list.

490

See: https://github.com/yt-dlp/yt-dlp/pull/519

491

"""

492

for ytcfg in data:

493

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

494

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

499

if ytcfg:

500

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

505

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

506

'identity token', default=None, fatal=False)

507

508

@staticmethod

509

def _extract_account_syncid(*args):

510

"""

511

Extract syncId required to download private playlists of secondary channels

512

@params response and/or ytcfg

513

"""

514

for data in args:

515

# ytcfg includes channel_syncid if on secondary channel

516

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

521

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

522

if len(sync_ids) >= 2 and sync_ids[1]:

523

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

524

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

529

"""

530

Extracts visitorData from an API response or ytcfg

531

Appears to be used to track session state

532

"""

533

return get_first(

534

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

535

expected_type=str)

536

537

@functools.cached_property

538

def is_authenticated(self):

539

return bool(self._generate_sapisidhash_header())

540

541

def extract_ytcfg(self, video_id, webpage):

542

if not webpage:

543

return {}

544

return self._parse_json(

545

self._search_regex(

546

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

547

default='{}'), video_id, fatal=False) or {}

548

549

def generate_api_headers(

550

self, *, ytcfg=None, account_syncid=None, session_index=None,

551

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

552

553

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

554

headers = {

555

'X-YouTube-Client-Name': compat_str(

556

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

557

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

558

'Origin': origin,

559

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

560

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

561

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

562

}

563

if session_index is None:

564

session_index = self._extract_session_index(ytcfg)

565

if account_syncid or session_index is not None:

566

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

567

568

auth = self._generate_sapisidhash_header(origin)

569

if auth is not None:

570

headers['Authorization'] = auth

571

headers['X-Origin'] = origin

572

return {h: v for h, v in headers.items() if v is not None}

573

574

def _download_ytcfg(self, client, video_id):

575

url = {

576

'web': 'https://www.youtube.com',

577

'web_music': 'https://music.youtube.com',

578

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

583

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

584

return self.extract_ytcfg(video_id, webpage) or {}

585

586

@staticmethod

587

def _build_api_continuation_query(continuation, ctp=None):

588

query = {

589

'continuation': continuation

590

}

591

# TODO: Inconsistency with clickTrackingParams.

592

# Currently we have a fixed ctp contained within context (from ytcfg)

593

# and a ctp in root query for continuation.

594

if ctp:

595

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

600

next_continuation = try_get(

601

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

602

lambda x: x['continuation']['reloadContinuationData']), dict)

603

if not next_continuation:

604

return

605

continuation = next_continuation.get('continuation')

606

if not continuation:

607

return

608

ctp = next_continuation.get('clickTrackingParams')

609

return cls._build_api_continuation_query(continuation, ctp)

610

611

@classmethod

612

def _extract_continuation_ep_data(cls, continuation_ep: dict):

613

if isinstance(continuation_ep, dict):

614

continuation = try_get(

615

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

616

if not continuation:

617

return

618

ctp = continuation_ep.get('clickTrackingParams')

619

return cls._build_api_continuation_query(continuation, ctp)

620

621

@classmethod

622

def _extract_continuation(cls, renderer):

623

next_continuation = cls._extract_next_continuation_data(renderer)

624

if next_continuation:

625

return next_continuation

626

627

contents = []

628

for key in ('contents', 'items'):

629

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

630

631

for content in contents:

632

if not isinstance(content, dict):

633

continue

634

continuation_ep = try_get(

635

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

636

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

637

dict)

638

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

644

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

645

if not isinstance(alert_dict, dict):

646

continue

647

for alert in alert_dict.values():

648

alert_type = alert.get('type')

649

if not alert_type:

650

continue

651

message = cls._get_text(alert, 'text')

652

if message:

653

yield alert_type, message

654

655

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

656

errors = []

657

warnings = []

658

for alert_type, alert_message in alerts:

659

if alert_type.lower() == 'error' and fatal:

660

errors.append([alert_type, alert_message])

661

else:

662

warnings.append([alert_type, alert_message])

663

664

for alert_type, alert_message in (warnings + errors[:-1]):

665

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

666

if errors:

667

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

668

669

def _extract_and_report_alerts(self, data, *args, **kwargs):

670

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

671

672

def _extract_badges(self, renderer: dict):

673

badges = set()

674

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

675

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

676

if label:

677

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

682

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

687

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

688

obj = [obj]

689

for item in obj:

690

text = try_get(item, lambda x: x['simpleText'], compat_str)

691

if text:

692

return text

693

runs = try_get(item, lambda x: x['runs'], list) or []

694

if not runs and isinstance(item, list):

695

runs = item

696

697

runs = runs[:min(len(runs), max_runs or len(runs))]

698

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

703

count_text = self._get_text(data, *path_list) or ''

704

count = parse_count(count_text)

705

if count is None:

706

count = str_to_int(

707

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

712

"""

713

Extract thumbnails from thumbnails dict

714

@param path_list: path list to level that contains 'thumbnails' key

715

"""

716

thumbnails = []

717

for path in path_list or [()]:

718

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

719

thumbnail_url = url_or_none(thumbnail.get('url'))

720

if not thumbnail_url:

721

continue

722

# Sometimes youtube gives a wrong thumbnail URL. See:

723

# https://github.com/yt-dlp/yt-dlp/issues/233

724

# https://github.com/ytdl-org/youtube-dl/issues/28023

725

if 'maxresdefault' in thumbnail_url:

726

thumbnail_url = thumbnail_url.split('?')[0]

727

thumbnails.append({

728

'url': thumbnail_url,

729

'height': int_or_none(thumbnail.get('height')),

730

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

736

"""

737

Extracts a relative time from string and converts to dt object

738

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

743

if start:

744

return datetime_from_str(start)

745

try:

746

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

751

"""@returns (timestamp, time_text)"""

752

text = self._get_text(renderer, *path_list) or ''

753

dt = self.extract_relative_time(text)

754

timestamp = None

755

if isinstance(dt, datetime.datetime):

756

timestamp = calendar.timegm(dt.timetuple())

757

758

if timestamp is None:

759

timestamp = (

760

unified_timestamp(text) or unified_timestamp(

761

self._search_regex(

762

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

763

text.lower(), 'time text', default=None)))

764

765

if text and timestamp is None:

766

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

767

return timestamp, text

768

769

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

770

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

771

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

776

if check_get_keys is None:

777

check_get_keys = []

778

while count < retries:

779

count += 1

780

if last_error:

781

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

782

try:

783

response = self._call_api(

784

ep=ep, fatal=True, headers=headers,

785

video_id=item_id, query=query,

786

context=self._extract_context(ytcfg, default_client),

787

api_key=self._extract_api_key(ytcfg, default_client),

788

api_hostname=api_hostname, default_client=default_client,

789

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

790

except ExtractorError as e:

791

if isinstance(e.cause, network_exceptions):

792

if isinstance(e.cause, compat_HTTPError):

793

first_bytes = e.cause.read(512)

794

if not is_html(first_bytes):

795

yt_error = try_get(

796

self._parse_json(

797

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

798

lambda x: x['error']['message'], compat_str)

799

if yt_error:

800

self._report_alerts([('ERROR', yt_error)], fatal=False)

801

# Downloading page may result in intermittent 5xx HTTP error

802

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

803

# We also want to catch all other network exceptions since errors in later pages can be troublesome

804

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

805

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

806

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

818

except ExtractorError as e:

819

# YouTube servers may return errors we want to retry on in a 200 OK response

820

# See: https://github.com/yt-dlp/yt-dlp/issues/839

821

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

827

return

828

if not check_get_keys or dict_get(response, check_get_keys):

829

break

830

# Youtube sometimes sends incomplete data

831

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

832

last_error = 'Incomplete data received'

833

if count >= retries:

834

if fatal:

835

raise ExtractorError(last_error)

836

else:

837

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

843

return re.match(r'https?://music\.youtube\.com/', url) is not None

844

845

def _extract_video(self, renderer):

846

video_id = renderer.get('videoId')

847

title = self._get_text(renderer, 'title')

848

description = self._get_text(renderer, 'descriptionSnippet')

849

duration = parse_duration(self._get_text(

850

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

851

if duration is None:

852

duration = parse_duration(self._search_regex(

853

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

854

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

855

video_id, default=None, group='duration'))

856

857

view_count = self._get_count(renderer, 'viewCountText')

858

859

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

860

channel_id = traverse_obj(

861

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

862

expected_type=str, get_all=False)

863

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

864

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

865

overlay_style = traverse_obj(

866

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

867

get_all=False, expected_type=str)

868

badges = self._extract_badges(renderer)

869

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

870

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

871

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

872

expected_type=str)) or ''

873

url = f'https://www.youtube.com/watch?v={video_id}'

874

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

875

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

884

'duration': duration,

885

'view_count': view_count,

886

'uploader': uploader,

887

'channel_id': channel_id,

888

'thumbnails': thumbnails,

889

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

890

if self._configuration_arg('approximate_date', ie_key='youtubetab')

891

else None),

892

'live_status': ('is_upcoming' if scheduled_timestamp is not None

893

else 'was_live' if 'streamed' in time_text.lower()

894

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

895

else None),

896

'release_timestamp': scheduled_timestamp,

897

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

902

IE_DESC = 'YouTube'

903

_VALID_URL = r"""(?x)^

904

(

905

(?:https?://|//) # http(s):// or protocol-independent URL

906

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

907

(?:www\.)?deturl\.com/www\.youtube\.com|

908

(?:www\.)?pwnyoutube\.com|

909

(?:www\.)?hooktube\.com|

910

(?:www\.)?yourepeat\.com|

911

tube\.majestyc\.net|

912

%(invidious)s|

913

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

914

(?:.*?\#/)? # handle anchor (#/) redirect urls

915

(?: # the various things that can precede the ID:

916

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

917

|(?: # or the v= param in all its forms

918

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

919

(?:\?|\#!?) # the params delimiter ? or # or #!

920

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

926

vid\.plus| # or vid.plus/xxxx

927

zwearz\.com/watch| # or zwearz.com/watch/xxxx

928

%(invidious)s

929

)/

930

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

931

)

932

)? # all until now is optional -> you can pass the naked ID

933

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

934

(?(1).+)? # if we found the ID, everything can follow

935

(?:\#|$)""" % {

936

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

937

}

938

_PLAYER_INFO_RE = (

939

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

940

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

941

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

942

)

943

_formats = {

944

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

945

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

946

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

947

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

948

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

949

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

950

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

951

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

952

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

953

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

954

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

955

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

956

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

957

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

958

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

959

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

960

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

961

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

966

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

967

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

968

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

969

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

970

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

971

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

972

973

# Apple HTTP Live Streaming

974

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

975

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

976

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

977

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

978

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

979

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

980

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

981

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

982

983

# DASH mp4 video

984

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

988

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

990

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

991

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

992

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

993

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

994

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

995

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

996

997

# Dash mp4 audio

998

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

999

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1000

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1001

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1002

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1003

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1004

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1005

1006

# Dash webm

1007

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1008

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1009

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1010

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1011

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1012

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1013

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1014

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1016

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1019

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1020

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1023

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1024

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1025

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1026

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1027

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1028

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1029

1030

# Dash webm audio

1031

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1032

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1033

1034

# Dash webm audio with opus inside

1035

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1036

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1037

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1038

1039

# RTMP (unnamed)

1040

'_rtmp': {'protocol': 'rtmp'},

1041

1042

# av01 video only formats sometimes served with "unknown" codecs

1043

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1044

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1045

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1046

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1047

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1048

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1049

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1050

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1051

}

1052

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1064

'uploader': 'Philipp Hagemeister',

1065

'uploader_id': 'phihag',

1066

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1067

'channel': 'Philipp Hagemeister',

1068

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1069

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1070

'upload_date': '20121002',

1071

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1072

'categories': ['Science & Technology'],

1073

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1078

'playable_in_embed': True,

1079

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1080

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1089

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1094

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1095

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1096

'uploader': 'SET India',

1097

'uploader_id': 'setindia',

1098

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1099

'age_limit': 18,

1100

},

1101

'skip': 'Private video',

1102

},

1103

{

1104

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1105

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1110

'uploader': 'Philipp Hagemeister',

1111

'uploader_id': 'phihag',

1112

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1113

'channel': 'Philipp Hagemeister',

1114

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1115

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1116

'upload_date': '20121002',

1117

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1118

'categories': ['Science & Technology'],

1119

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1124

'playable_in_embed': True,

1125

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1126

'live_status': 'not_live',

1127

'age_limit': 0,

1128

'channel_follower_count': int

1129

},

1130

'params': {

1131

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1136

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1141

'uploader_id': '8KVIDEO',

1142

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1143

'description': '',

1144

'uploader': '8KVIDEO',

1145

'title': 'UHDTV TEST 8K VIDEO.mp4'

1146

},

1147

'params': {

1148

'youtube_include_dash_manifest': True,

1149

'format': '141',

1150

},

1151

'skip': 'format 141 not served anymore',

1152

},

1153

# DASH manifest with encrypted signature

1154

{

1155

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1160

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1161

'duration': 244,

1162

'uploader': 'AfrojackVEVO',

1163

'uploader_id': 'AfrojackVEVO',

1164

'upload_date': '20131011',

1165

'abr': 129.495,

1166

'like_count': int,

1167

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1168

'playable_in_embed': True,

1169

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1170

'view_count': int,

1171

'track': 'The Spark',

1172

'live_status': 'not_live',

1173

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1174

'channel': 'Afrojack',

1175

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1176

'tags': 'count:19',

1177

'availability': 'public',

1178

'categories': ['Music'],

1179

'age_limit': 0,

1180

'alt_title': 'The Spark',

1181

'channel_follower_count': int

1182

},

1183

'params': {

1184

'youtube_include_dash_manifest': True,

1185

'format': '141/bestaudio[ext=m4a]',

1186

},

1187

},

1188

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1189

{

1190

'note': 'Embed allowed age-gate video',

1191

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1196

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1197

'duration': 142,

1198

'uploader': 'The Witcher',

1199

'uploader_id': 'WitcherGame',

1200

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1201

'upload_date': '20140605',

1202

'age_limit': 18,

1203

'categories': ['Gaming'],

1204

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1205

'availability': 'needs_auth',

1206

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1207

'like_count': int,

1208

'channel': 'The Witcher',

1209

'live_status': 'not_live',

1210

'tags': 'count:17',

1211

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1212

'playable_in_embed': True,

1213

'view_count': int,

1214

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1219

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1224

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1225

'upload_date': '20200408',

1226

'uploader_id': 'FlyingKitty900',

1227

'uploader': 'FlyingKitty',

1228

'age_limit': 18,

1229

'availability': 'needs_auth',

1230

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1231

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1232

'channel': 'FlyingKitty',

1233

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1234

'view_count': int,

1235

'categories': ['Entertainment'],

1236

'live_status': 'not_live',

1237

'tags': ['Flyingkitty', 'godzilla 2'],

1238

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1239

'like_count': int,

1240

'duration': 177,

1241

'playable_in_embed': True,

1242

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1247

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1248

'info_dict': {

1249

'id': 'Tq92D6wQ1mg',

1250

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1251

'ext': 'mp4',

1252

'upload_date': '20191228',

1253

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1254

'uploader': 'Projekt Melody',

1255

'description': 'md5:17eccca93a786d51bc67646756894066',

1256

'age_limit': 18,

1257

'like_count': int,

1258

'availability': 'needs_auth',

1259

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1260

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'view_count': int,

1262

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1263

'channel': 'Projekt Melody',

1264

'live_status': 'not_live',

1265

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1266

'playable_in_embed': True,

1267

'categories': ['Entertainment'],

1268

'duration': 106,

1269

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1270

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1275

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1280

'uploader': 'Herr Lurik',

1281

'uploader_id': 'st3in234',

1282

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1283

'upload_date': '20130730',

1284

'track': 'Such mich find mich',

1285

'age_limit': 0,

1286

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1287

'like_count': int,

1288

'playable_in_embed': False,

1289

'creator': 'OOMPH!',

1290

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1291

'view_count': int,

1292

'alt_title': 'Such mich find mich',

1293

'duration': 210,

1294

'channel': 'Herr Lurik',

1295

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1296

'categories': ['Music'],

1297

'availability': 'public',

1298

'uploader_url': 'http://www.youtube.com/user/st3in234',

1299

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1300

'live_status': 'not_live',

1301

'artist': 'OOMPH!',

1302

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1307

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1308

'only_matching': True,

1309

},

1310

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1311

# YouTube Red ad is not captured for creator

1312

{

1313

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1319

'uploader_id': 'deadmau5',

1320

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1321

'creator': 'deadmau5',

1322

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1323

'uploader': 'deadmau5',

1324

'title': 'Deadmau5 - Some Chords (HD)',

1325

'alt_title': 'Some Chords',

1326

'availability': 'public',

1327

'tags': 'count:14',

1328

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1329

'view_count': int,

1330

'live_status': 'not_live',

1331

'channel': 'deadmau5',

1332

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1333

'like_count': int,

1334

'track': 'Some Chords',

1335

'artist': 'deadmau5',

1336

'playable_in_embed': True,

1337

'age_limit': 0,

1338

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1339

'categories': ['Music'],

1340

'album': 'Some Chords',

1341

'channel_follower_count': int

1342

},

1343

'expected_warnings': [

1344

'DASH manifest missing',

1345

]

1346

},

1347

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1348

{

1349

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1355

'uploader_id': 'olympic',

1356

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1357

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1358

'uploader': 'Olympics',

1359

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1360

'like_count': int,

1361

'release_timestamp': 1343767800,

1362

'playable_in_embed': True,

1363

'categories': ['Sports'],

1364

'release_date': '20120731',

1365

'channel': 'Olympics',

1366

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1367

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1368

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1369

'age_limit': 0,

1370

'availability': 'public',

1371

'live_status': 'was_live',

1372

'view_count': int,

1373

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1374

'channel_follower_count': int

1375

},

1376

'params': {

1377

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1387

'duration': 85,

1388

'upload_date': '20110310',

1389

'uploader_id': 'AllenMeow',

1390

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1391

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1392

'uploader': '孫ᄋᄅ',

1393

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1394

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1399

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1400

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1401

'view_count': int,

1402

'categories': ['People & Blogs'],

1403

'like_count': int,

1404

'live_status': 'not_live',

1405

'availability': 'unlisted',

1406

'channel_follower_count': int

1407

},

1408

},

1409

# url_encoded_fmt_stream_map is empty string

1410

{

1411

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1416

'description': '',

1417

'upload_date': '20150404',

1418

'uploader_id': 'spbelect',

1419

'uploader': 'Наблюдатели Петербурга',

1420

},

1421

'params': {

1422

'skip_download': 'requires avconv',

1423

},

1424

'skip': 'This live event has ended.',

1425

},

1426

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1427

{

1428

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1433

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1434

'duration': 220,

1435

'upload_date': '20150625',

1436

'uploader_id': 'dorappi2000',

1437

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1438

'uploader': 'dorappi2000',

1439

'formats': 'mincount:31',

1440

},

1441

'skip': 'not actual anymore',

1442

},

1443

# DASH manifest with segment_list

1444

{

1445

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1446

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1451

'uploader': 'Airtek',

1452

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1453

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1454

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1455

},

1456

'params': {

1457

'youtube_include_dash_manifest': True,

1458

'format': '135', # bestvideo

1459

},

1460

'skip': 'This live event has ended.',

1461

},

1462

{

1463

# Multifeed videos (multiple cameras), URL is for Main Camera

1464

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1465

'info_dict': {

1466

'id': 'jvGDaLqkpTg',

1467

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1475

'description': 'md5:e03b909557865076822aa169218d6a5d',

1476

'duration': 10643,

1477

'upload_date': '20161111',

1478

'uploader': 'Team PGP',

1479

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1480

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1487

'description': 'md5:e03b909557865076822aa169218d6a5d',

1488

'duration': 10991,

1489

'upload_date': '20161111',

1490

'uploader': 'Team PGP',

1491

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1492

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1499

'description': 'md5:e03b909557865076822aa169218d6a5d',

1500

'duration': 10995,

1501

'upload_date': '20161111',

1502

'uploader': 'Team PGP',

1503

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1504

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1511

'description': 'md5:e03b909557865076822aa169218d6a5d',

1512

'duration': 10990,

1513

'upload_date': '20161111',

1514

'uploader': 'Team PGP',

1515

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1516

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1521

},

1522

'skip': 'Not multifeed anymore',

1523

},

1524

{

1525

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1526

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1527

'info_dict': {

1528

'id': 'gVfLd0zydlo',

1529

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1530

},

1531

'playlist_count': 2,

1532

'skip': 'Not multifeed anymore',

1533

},

1534

{

1535

'url': 'https://vid.plus/FlRa-iH7PGw',

1536

'only_matching': True,

1537

},

1538

{

1539

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1540

'only_matching': True,

1541

},

1542

{

1543

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1544

# Also tests cut-off URL expansion in video description (see

1545

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1546

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1547

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1552

'alt_title': 'Dark Walk',

1553

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1554

'duration': 133,

1555

'upload_date': '20151119',

1556

'uploader_id': 'IronSoulElf',

1557

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1558

'uploader': 'IronSoulElf',

1559

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1560

'track': 'Dark Walk',

1561

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1562

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1563

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1564

'categories': ['Film & Animation'],

1565

'view_count': int,

1566

'live_status': 'not_live',

1567

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1568

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1569

'tags': 'count:13',

1570

'availability': 'public',

1571

'channel': 'IronSoulElf',

1572

'playable_in_embed': True,

1573

'like_count': int,

1574

'age_limit': 0,

1575

'channel_follower_count': int

1576

},

1577

'params': {

1578

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1583

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1584

'only_matching': True,

1585

},

1586

{

1587

# Video with yt:stretch=17:0

1588

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1593

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1594

'upload_date': '20151107',

1595

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1596

'uploader': 'CH GAMER DROID',

1597

},

1598

'params': {

1599

'skip_download': True,

1600

},

1601

'skip': 'This video does not exist.',

1602

},

1603

{

1604

# Video with incomplete 'yt:stretch=16:'

1605

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1606

'only_matching': True,

1607

},

1608

{

1609

# Video licensed under Creative Commons

1610

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1615

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1616

'duration': 721,

1617

'upload_date': '20150128',

1618

'uploader_id': 'BerkmanCenter',

1619

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1620

'uploader': 'The Berkman Klein Center for Internet & Society',

1621

'license': 'Creative Commons Attribution license (reuse allowed)',

1622

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1623

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1624

'like_count': int,

1625

'age_limit': 0,

1626

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1627

'channel': 'The Berkman Klein Center for Internet & Society',

1628

'availability': 'public',

1629

'view_count': int,

1630

'categories': ['Education'],

1631

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1632

'live_status': 'not_live',

1633

'playable_in_embed': True,

1634

'channel_follower_count': int

1635

},

1636

'params': {

1637

'skip_download': True,

},

},

{

# Channel-like uploader_url

1642

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1647

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1648

'duration': 4060,

1649

'upload_date': '20151120',

1650

'uploader': 'Bernie Sanders',

1651

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1653

'license': 'Creative Commons Attribution license (reuse allowed)',

1654

'playable_in_embed': True,

1655

'tags': 'count:12',

1656

'like_count': int,

1657

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1658

'age_limit': 0,

1659

'availability': 'public',

1660

'categories': ['News & Politics'],

1661

'channel': 'Bernie Sanders',

1662

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1663

'view_count': int,

1664

'live_status': 'not_live',

1665

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1666

'channel_follower_count': int

1667

},

1668

'params': {

1669

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1674

'only_matching': True,

1675

},

1676

{

1677

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1678

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1679

'only_matching': True,

1680

},

1681

{

1682

# Rental video preview

1683

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1688

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1689

'upload_date': '20150811',

1690

'uploader': 'FlixMatrix',

1691

'uploader_id': 'FlixMatrixKaravan',

1692

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1693

'license': 'Standard YouTube License',

1694

},

1695

'params': {

1696

'skip_download': True,

1697

},

1698

'skip': 'This video is not available.',

1699

},

1700

{

1701

# YouTube Red video with episode data

1702

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1707

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1708

'duration': 2085,

1709

'upload_date': '20170118',

1710

'uploader': 'Vsauce',

1711

'uploader_id': 'Vsauce',

1712

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1713

'series': 'Mind Field',

1714

'season_number': 1,

1715

'episode_number': 1,

1716

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1717

'tags': 'count:12',

1718

'view_count': int,

1719

'availability': 'public',

1720

'age_limit': 0,

1721

'channel': 'Vsauce',

1722

'episode': 'Episode 1',

1723

'categories': ['Entertainment'],

1724

'season': 'Season 1',

1725

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1726

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1727

'like_count': int,

1728

'playable_in_embed': True,

1729

'live_status': 'not_live',

1730

'channel_follower_count': int

1731

},

1732

'params': {

1733

'skip_download': True,

1734

},

1735

'expected_warnings': [

1736

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1741

# as inappropriate or offensive to some audiences.

1742

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1747

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1748

'duration': 965,

1749

'upload_date': '20140124',

1750

'uploader': 'New Century Foundation',

1751

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1752

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1753

},

1754

'params': {

1755

'skip_download': True,

1756

},

1757

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1762

'only_matching': True,

1763

},

1764

{

1765

# geo restricted to JP

1766

'url': 'sJL6WA-aGkQ',

1767

'only_matching': True,

1768

},

1769

{

1770

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1771

'only_matching': True,

1772

},

1773

{

1774

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1775

'only_matching': True,

1776

},

1777

{

1778

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1779

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1780

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1785

'only_matching': True,

1786

},

1787

{

1788

# Video with unsupported adaptive stream type formats

1789

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1794

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1795

'duration': 433,

1796

'upload_date': '20130923',

1797

'uploader': 'Amelia Putri Harwita',

1798

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1800

'formats': 'maxcount:10',

1801

},

1802

'params': {

1803

'skip_download': True,

1804

'youtube_include_dash_manifest': False,

1805

},

1806

'skip': 'not actual anymore',

1807

},

1808

{

1809

# Youtube Music Auto-generated description

1810

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1815

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1816

'upload_date': '20190312',

1817

'uploader': 'Stephen - Topic',

1818

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1819

'artist': 'Stephen',

1820

'track': 'Voyeur Girl',

1821

'album': 'it\'s too much love to know my dear',

1822

'release_date': '20190313',

1823

'release_year': 2019,

1824

'alt_title': 'Voyeur Girl',

1825

'view_count': int,

1826

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1827

'playable_in_embed': True,

1828

'like_count': int,

1829

'categories': ['Music'],

1830

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1831

'channel': 'Stephen',

1832

'availability': 'public',

1833

'creator': 'Stephen',

1834

'duration': 169,

1835

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1836

'age_limit': 0,

1837

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1838

'tags': 'count:11',

1839

'live_status': 'not_live',

1840

'channel_follower_count': int

1841

},

1842

'params': {

1843

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1848

'only_matching': True,

1849

},

1850

{

1851

# invalid -> valid video id redirection

1852

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1857

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1858

'upload_date': '20090125',

1859

'uploader': 'Prochorowka',

1860

'uploader_id': 'Prochorowka',

1861

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1862

'artist': 'Panjabi MC',

1863

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1864

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1865

},

1866

'params': {

1867

'skip_download': True,

1868

},

1869

'skip': 'Video unavailable',

1870

},

1871

{

1872

# empty description results in an empty string

1873

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1880

'uploader_id': 'ElevageOrVert',

1881

'uploader': 'ElevageOrVert',

1882

'view_count': int,

1883

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1884

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1885

'like_count': int,

1886

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1887

'tags': [],

1888

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1889

'availability': 'public',

1890

'age_limit': 0,

1891

'categories': ['Pets & Animals'],

1892

'duration': 7,

1893

'playable_in_embed': True,

1894

'live_status': 'not_live',

1895

'channel': 'ElevageOrVert',

1896

'channel_follower_count': int

1897

},

1898

'params': {

1899

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1904

# see [2] for an example with '};' inside ytInitialPlayerResponse

1905

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1906

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1907

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1912

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1913

'upload_date': '20130831',

1914

'uploader_id': 'kudvenkat',

1915

'uploader': 'kudvenkat',

1916

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1917

'like_count': int,

1918

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1919

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1920

'live_status': 'not_live',

1921

'categories': ['Education'],

1922

'availability': 'public',

1923

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1924

'tags': 'count:12',

1925

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1930

'channel_follower_count': int

1931

},

1932

'params': {

1933

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1938

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1939

'only_matching': True,

1940

},

1941

{

1942

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1943

'only_matching': True,

1944

},

1945

{

1946

# https://github.com/ytdl-org/youtube-dl/pull/28094

1947

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1953

'upload_date': '20141120',

1954

'uploader': 'The Cinematic Orchestra - Topic',

1955

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1956

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1957

'artist': 'The Cinematic Orchestra',

1958

'track': 'Burn Out',

1959

'album': 'Every Day',

1960

'like_count': int,

1961

'live_status': 'not_live',

1962

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'creator': 'The Cinematic Orchestra',

1968

'channel': 'The Cinematic Orchestra',

1969

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1970

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1971

'availability': 'public',

1972

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1973

'categories': ['Music'],

1974

'playable_in_embed': True,

1975

'channel_follower_count': int

1976

},

1977

'params': {

1978

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1983

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1984

'only_matching': True,

1985

},

1986

{

1987

# controversial video, requires bpctr/contentCheckOk

1988

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1993

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1994

'uploader': 'CBS Mornings',

1995

'uploader_id': 'CBSThisMorning',

1996

'upload_date': '20140716',

1997

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1998

'duration': 170,

1999

'categories': ['News & Politics'],

2000

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2001

'view_count': int,

2002

'channel': 'CBS Mornings',

2003

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2004

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2005

'age_limit': 18,

2006

'availability': 'needs_auth',

2007

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2008

'like_count': int,

2009

'live_status': 'not_live',

2010

'playable_in_embed': True,

2011

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2016

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2021

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2022

'upload_date': '20201120',

2023

'uploader': 'Walk around Japan',

2024

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'duration': 1456,

2027

'categories': ['Travel & Events'],

2028

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2029

'view_count': int,

2030

'channel': 'Walk around Japan',

2031

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2032

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2033

'age_limit': 0,

2034

'availability': 'public',

2035

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2036

'live_status': 'not_live',

2037

'playable_in_embed': True,

2038

'channel_follower_count': int

2039

},

2040

'params': {

2041

'skip_download': True,

2042

},

2043

}, {

2044

# Has multiple audio streams

2045

'url': 'WaOKSUlf4TM',

2046

'only_matching': True

2047

}, {

2048

# Requires Premium: has format 141 when requested using YTM url

2049

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2050

'only_matching': True

2051

}, {

2052

# multiple subtitles with same lang_code

2053

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2054

'only_matching': True,

2055

}, {

2056

# Force use android client fallback

2057

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2058

'info_dict': {

2059

'id': 'YOelRv7fMxY',

2060

'title': 'DIGGING A SECRET TUNNEL Part 1',

2061

'ext': '3gp',

2062

'upload_date': '20210624',

2063

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2064

'uploader': 'colinfurze',

2065

'uploader_id': 'colinfurze',

2066

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2067

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2068

'duration': 596,

2069

'categories': ['Entertainment'],

2070

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2071

'view_count': int,

2072

'channel': 'colinfurze',

2073

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2074

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2075

'age_limit': 0,

2076

'availability': 'public',

2077

'like_count': int,

2078

'live_status': 'not_live',

2079

'playable_in_embed': True,

2080

'channel_follower_count': int

2081

},

2082

'params': {

2083

'format': '17', # 3gp format available on android

2084

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2089

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2090

'only_matching': True,

2091

'params': {

2092

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2097

'only_matching': True,

2098

}, {

2099

'note': 'Storyboards',

2100

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2106

'uploader_id': 'scishow',

2107

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2108

'upload_date': '20140324',

2109

'uploader': 'SciShow',

2110

'like_count': int,

2111

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2112

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2113

'view_count': int,

2114

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2115

'playable_in_embed': True,

2116

'tags': 'count:12',

2117

'uploader_url': 'http://www.youtube.com/user/scishow',

2118

'availability': 'public',

2119

'channel': 'SciShow',

2120

'live_status': 'not_live',

2121

'duration': 248,

2122

'categories': ['Education'],

2123

'age_limit': 0,

2124

'channel_follower_count': int

2125

}, 'params': {'format': 'mhtml', 'skip_download': True}

2126

}, {

2127

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2128

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2133

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2134

'uploader': 'Leon Nguyen',

2135

'uploader_id': 'VNSXIII',

2136

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2137

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2138

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2143

'tags': 'count:23',

2144

'playable_in_embed': True,

2145

'live_status': 'not_live',

2146

'upload_date': '20220103',

2147

'like_count': int,

2148

'availability': 'public',

2149

'channel': 'Leon Nguyen',

2150

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2151

'channel_follower_count': int

2152

}

2153

}, {

2154

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2155

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2160

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2161

'uploader': 'Quackity',

2162

'uploader_id': 'QuackityHQ',

2163

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2164

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2165

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2170

'tags': 'count:26',

2171

'playable_in_embed': True,

2172

'live_status': 'not_live',

2173

'release_timestamp': 1641172509,

2174

'release_date': '20220103',

2175

'upload_date': '20220103',

2176

'like_count': int,

2177

'availability': 'public',

2178

'channel': 'Quackity',

2179

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2180

'channel_follower_count': int

2181

}

2182

},

2183

{ # continuous livestream. Microformat upload date should be preferred.

2184

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2185

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2186

'info_dict': {

2187

'id': 'kgx4WGK0oNU',

2188

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2189

'ext': 'mp4',

2190

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2191

'availability': 'public',

2192

'age_limit': 0,

2193

'release_timestamp': 1637975704,

2194

'upload_date': '20210619',

2195

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2196

'live_status': 'is_live',

2197

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2198

'uploader': '阿鲍Abao',

2199

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2200

'channel': 'Abao in Tokyo',

2201

'channel_follower_count': int,

2202

'release_date': '20211127',

2203

'tags': 'count:39',

2204

'categories': ['People & Blogs'],

2205

'like_count': int,

2206

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2207

'view_count': int,

2208

'playable_in_embed': True,

2209

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2210

},

2211

'params': {'skip_download': True}

2212

}, {

2213

# Story. Requires specific player params to work.

2214

# Note: stories get removed after some period of time

2215

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2220

'view_count': int,

2221

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2222

'upload_date': '20220526',

2223

'categories': ['Education'],

2224

'title': 'Story',

2225

'channel': 'IT\'S HISTORY',

2226

'description': '',

2227

'uploader_id': 'BlastfromthePast',

2228

'duration': 12,

2229

'uploader': 'IT\'S HISTORY',

2230

'playable_in_embed': True,

2231

'age_limit': 0,

2232

'live_status': 'not_live',

2233

'tags': [],

2234

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2235

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2236

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2237

}

2238

}, {

2239

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2244

'upload_date': '20220323',

2245

'like_count': int,

2246

'availability': 'unlisted',

2247

'channel': 'nao20010128nao',

2248

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2249

'age_limit': 0,

2250

'uploader': 'nao20010128nao',

2251

'uploader_id': 'nao20010128nao',

2252

'categories': ['Music'],

2253

'view_count': int,

2254

'description': '',

2255

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2256

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2257

'live_status': 'not_live',

2258

'playable_in_embed': True,

2259

'channel_follower_count': int,

2260

'duration': 6,

2261

'tags': [],

2262

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2269

from ..utils import parse_qs

2270

2271

qs = parse_qs(url)

2272

if qs.get('list', [None])[0]:

2273

return False

2274

return super().suitable(url)

2275

2276

def __init__(self, *args, **kwargs):

2277

super().__init__(*args, **kwargs)

2278

self._code_cache = {}

2279

self._player_cache = {}

2280

2281

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2282

lock = threading.Lock()

2283

2284

is_live = True

2285

start_time = time.time()

2286

formats = [f for f in formats if f.get('is_from_start')]

2287

2288

def refetch_manifest(format_id, delay):

2289

nonlocal formats, start_time, is_live

2290

if time.time() <= start_time + delay:

2291

return

2292

2293

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2294

video_details = traverse_obj(

2295

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2296

microformats = traverse_obj(

2297

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2298

expected_type=dict, default=[])

2299

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2300

start_time = time.time()

2301

2302

def mpd_feed(format_id, delay):

2303

"""

2304

@returns (manifest_url, manifest_stream_number, is_live) or None

2305

"""

2306

with lock:

2307

refetch_manifest(format_id, delay)

2308

2309

f = next((f for f in formats if f['format_id'] == format_id), None)

2310

if not f:

2311

if not is_live:

2312

self.to_screen(f'{video_id}: Video is no longer live')

2313

else:

2314

self.report_warning(

2315

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2316

return None

2317

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2322

f['fragments'] = functools.partial(

2323

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2324

2325

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2326

FETCH_SPAN, MAX_DURATION = 5, 432000

2327

2328

mpd_url, stream_number, is_live = None, None, True

2329

2330

begin_index = 0

2331

download_start_time = ctx.get('start') or time.time()

2332

2333

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2334

if lack_early_segments:

2335

self.report_warning(bug_reports_message(

2336

'Starting download from the last 120 hours of the live stream since '

2337

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2338

lack_early_segments = True

2339

2340

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2341

fragments, fragment_base_url = None, None

2342

2343

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2344

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2345

# Obtain from MPD's maximum seq value

2346

old_mpd_url = mpd_url

2347

last_error = ctx.pop('last_error', None)

2348

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2349

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2350

or (mpd_url, stream_number, False))

2351

if not refresh_sequence:

2352

if expire_fast and not is_live:

2353

return False, last_seq

2354

elif old_mpd_url == mpd_url:

2355

return True, last_seq

2356

try:

2357

fmts, _ = self._extract_mpd_formats_and_subtitles(

2358

mpd_url, None, note=False, errnote=False, fatal=False)

2359

except ExtractorError:

2360

fmts = None

2361

if not fmts:

2362

no_fragment_score += 2

2363

return False, last_seq

2364

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2365

fragments = fmt_info['fragments']

2366

fragment_base_url = fmt_info['fragment_base_url']

2367

assert fragment_base_url

2368

2369

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2370

return True, _last_seq

2371

2372

while is_live:

2373

fetch_time = time.time()

2374

if no_fragment_score > 30:

2375

return

2376

if last_segment_url:

2377

# Obtain from "X-Head-Seqnum" header value from each segment

2378

try:

2379

urlh = self._request_webpage(

2380

last_segment_url, None, note=False, errnote=False, fatal=False)

2381

except ExtractorError:

2382

urlh = None

2383

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2384

if last_seq is None:

2385

no_fragment_score += 2

2386

last_segment_url = None

2387

continue

2388

else:

2389

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2390

no_fragment_score += 2

2391

if not should_continue:

2392

continue

2393

2394

if known_idx > last_seq:

2395

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2401

# skip from the start when it's negative value

2402

known_idx = last_seq + begin_index

2403

if lack_early_segments:

2404

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2405

try:

2406

for idx in range(known_idx, last_seq):

2407

# do not update sequence here or you'll get skipped some part of it

2408

should_continue, _ = _extract_sequence_from_mpd(False, False)

2409

if not should_continue:

2410

known_idx = idx - 1

2411

raise ExtractorError('breaking out of outer loop')

2412

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2413

yield {

2414

'url': last_segment_url,

2415

}

2416

if known_idx == last_seq:

2417

no_fragment_score += 5

2418

else:

2419

no_fragment_score = 0

2420

known_idx = last_seq

2421

except ExtractorError:

2422

continue

2423

2424

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2425

2426

def _extract_player_url(self, *ytcfgs, webpage=None):

2427

player_url = traverse_obj(

2428

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2429

get_all=False, expected_type=compat_str)

2430

if not player_url:

2431

return

2432

return urljoin('https://www.youtube.com', player_url)

2433

2434

def _download_player_url(self, video_id, fatal=False):

2435

res = self._download_webpage(

2436

'https://www.youtube.com/iframe_api',

2437

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2438

if res:

2439

player_version = self._search_regex(

2440

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2441

if player_version:

2442

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2443

2444

def _signature_cache_id(self, example_sig):

2445

""" Return a string representation of a signature """

2446

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2447

2448

@classmethod

2449

def _extract_player_info(cls, player_url):

2450

for player_re in cls._PLAYER_INFO_RE:

2451

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2456

return id_m.group('id')

2457

2458

def _load_player(self, video_id, player_url, fatal=True):

2459

player_id = self._extract_player_info(player_url)

2460

if player_id not in self._code_cache:

2461

code = self._download_webpage(

2462

player_url, video_id, fatal=fatal,

2463

note='Downloading player ' + player_id,

2464

errnote='Download of %s failed' % player_url)

2465

if code:

2466

self._code_cache[player_id] = code

2467

return self._code_cache.get(player_id)

2468

2469

def _extract_signature_function(self, video_id, player_url, example_sig):

2470

player_id = self._extract_player_info(player_url)

2471

2472

# Read from filesystem cache

2473

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2474

assert os.path.basename(func_id) == func_id

2475

2476

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2477

if cache_spec is not None:

2478

return lambda s: ''.join(s[i] for i in cache_spec)

2479

2480

code = self._load_player(video_id, player_url)

2481

if code:

2482

res = self._parse_sig_js(code)

2483

2484

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2485

cache_res = res(test_string)

2486

cache_spec = [ord(c) for c in cache_res]

2487

2488

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2489

return res

2490

2491

def _print_sig_code(self, func, example_sig):

2492

if not self.get_param('youtube_print_sig_code'):

2493

return

2494

2495

def gen_sig_code(idxs):

2496

def _genslice(start, end, step):

2497

starts = '' if start == 0 else str(start)

2498

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2499

steps = '' if step == 1 else (':%d' % step)

2500

return f's[{starts}{ends}{steps}]'

2501

2502

step = None

2503

# Quelch pyflakes warnings - start will be set when step is set

2504

start = '(Never used)'

2505

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2510

step = None

2511

continue

2512

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2522

2523

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2524

cache_res = func(test_string)

2525

cache_spec = [ord(c) for c in cache_res]

2526

expr_code = ' + '.join(gen_sig_code(cache_spec))

2527

signature_id_tuple = '(%s)' % (

2528

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2529

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2530

' return %s\n') % (signature_id_tuple, expr_code)

2531

self.to_screen('Extracted signature function:\n' + code)

2532

2533

def _parse_sig_js(self, jscode):

2534

funcname = self._search_regex(

2535

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2536

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2537

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2538

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2539

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2540

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2541

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2542

# Obsolete patterns

2543

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2544

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2545

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2546

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2547

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2548

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2549

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2550

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2551

jscode, 'Initial JS player signature function name', group='sig')

2552

2553

jsi = JSInterpreter(jscode)

2554

initial_function = jsi.extract_function(funcname)

2555

return lambda s: initial_function([s])

2556

2557

def _decrypt_signature(self, s, video_id, player_url):

2558

"""Turn the encrypted s field into a working signature"""

2559

try:

2560

player_id = (player_url, self._signature_cache_id(s))

2561

if player_id not in self._player_cache:

2562

func = self._extract_signature_function(video_id, player_url, s)

2563

self._player_cache[player_id] = func

2564

func = self._player_cache[player_id]

2565

self._print_sig_code(func, s)

2566

return func(s)

2567

except Exception as e:

2568

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2569

2570

def _decrypt_nsig(self, s, video_id, player_url):

2571

"""Turn the encrypted n field into a working signature"""

2572

if player_url is None:

2573

raise ExtractorError('Cannot decrypt nsig without player_url')

2574

player_url = urljoin('https://www.youtube.com', player_url)

2575

2576

sig_id = ('nsig_value', s)

2577

if sig_id in self._player_cache:

2578

return self._player_cache[sig_id]

2579

2580

try:

2581

player_id = ('nsig', player_url)

2582

if player_id not in self._player_cache:

2583

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2584

func = self._player_cache[player_id]

2585

self._player_cache[sig_id] = func(s)

2586

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2587

return self._player_cache[sig_id]

2588

except Exception as e:

2589

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2590

2591

def _extract_n_function_name(self, jscode):

2592

nfunc, idx = self._search_regex(

2593

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2594

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2595

if not idx:

2596

return nfunc

2597

return json.loads(js_to_json(self._search_regex(

2598

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2599

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2600

2601

def _extract_n_function(self, video_id, player_url):

2602

player_id = self._extract_player_info(player_url)

2603

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2604

2605

if func_code:

2606

jsi = JSInterpreter(func_code)

2607

else:

2608

jscode = self._load_player(video_id, player_url)

2609

funcname = self._extract_n_function_name(jscode)

2610

jsi = JSInterpreter(jscode)

2611

func_code = jsi.extract_function_code(funcname)

2612

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2613

2614

if self.get_param('youtube_print_sig_code'):

2615

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2616

2617

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2618

2619

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2620

"""

2621

Extract signatureTimestamp (sts)

2622

Required to tell API what sig/player version is in use.

2623

"""

2624

sts = None

2625

if isinstance(ytcfg, dict):

2626

sts = int_or_none(ytcfg.get('STS'))

2627

2628

if not sts:

2629

# Attempt to extract from player

2630

if player_url is None:

2631

error_msg = 'Cannot extract signature timestamp without player_url.'

2632

if fatal:

2633

raise ExtractorError(error_msg)

2634

self.report_warning(error_msg)

2635

return

2636

code = self._load_player(video_id, player_url, fatal=fatal)

2637

if code:

2638

sts = int_or_none(self._search_regex(

2639

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2640

'JS player signature timestamp', group='sts', fatal=fatal))

2641

return sts

2642

2643

def _mark_watched(self, video_id, player_responses):

2644

playback_url = get_first(

2645

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2646

expected_type=url_or_none)

2647

if not playback_url:

2648

self.report_warning('Unable to mark watched')

2649

return

2650

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2651

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2652

2653

# cpn generation algorithm is reverse engineered from base.js.

2654

# In fact it works even with dummy cpn.

2655

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2656

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2663

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2664

2665

self._download_webpage(

2666

playback_url, video_id, 'Marking watched',

2667

'Unable to mark watched', fatal=False)

2668

2669

@staticmethod

2670

def _extract_urls(webpage):

2671

# Embedded YouTube player

2672

entries = [

2673

unescapeHTML(mobj.group('url'))

2674

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2685

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2686

\1''', webpage)]

2687

2688

# lazyYT YouTube embed

2689

entries.extend(list(map(

2690

unescapeHTML,

2691

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2692

2693

# Wordpress "YouTube Video Importer" plugin

2694

matches = re.findall(r'''(?x)<div[^>]+

2695

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2696

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2697

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2703

urls = YoutubeIE._extract_urls(webpage)

2704

return urls[0] if urls else None

2705

2706

@classmethod

2707

def extract_id(cls, url):

2708

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2709

if mobj is None:

2710

raise ExtractorError('Invalid URL: %s' % url)

2711

return mobj.group('id')

2712

2713

def _extract_chapters_from_json(self, data, duration):

2714

chapter_list = traverse_obj(

2715

data, (

2716

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2717

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2718

), expected_type=list)

2719

2720

return self._extract_chapters(

2721

chapter_list,

2722

chapter_time=lambda chapter: float_or_none(

2723

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2724

chapter_title=lambda chapter: traverse_obj(

2725

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2726

duration=duration)

2727

2728

def _extract_chapters_from_engagement_panel(self, data, duration):

2729

content_list = traverse_obj(

2730

data,

2731

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2732

expected_type=list, default=[])

2733

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2734

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2735

2736

return next(filter(None, (

2737

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2738

chapter_time, chapter_title, duration)

2739

for contents in content_list)), [])

2740

2741

def _extract_chapters_from_description(self, description, duration):

2742

return self._extract_chapters(

2743

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2744

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2745

duration=duration, strict=False)

2746

2747

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2752

'title': chapter_title(chapter),

2753

} for chapter in chapter_list or []]

2754

if not strict:

2755

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2756

2757

chapters = [{'start_time': 0, 'title': '<Untitled>'}]

2758

for idx, chapter in enumerate(chapter_list):

2759

if chapter['start_time'] is None or not chapter['title']:

2760

self.report_warning(f'Incomplete chapter {idx}')

2761

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2762

chapters[-1]['end_time'] = chapter['start_time']

2763

chapters.append(chapter)

2764

else:

2765

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2766

chapters[-1]['end_time'] = duration

2767

return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]

2768

2769

def _extract_comment(self, comment_renderer, parent=None):

2770

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2775

2776

# note: timestamp is an estimate calculated from the current time and time_text

2777

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2778

author = self._get_text(comment_renderer, 'authorText')

2779

author_id = try_get(comment_renderer,

2780

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2781

2782

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2783

lambda x: x['likeCount']), compat_str)) or 0

2784

author_thumbnail = try_get(comment_renderer,

2785

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2786

2787

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2788

is_favorited = 'creatorHeart' in (try_get(

2789

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2794

'time_text': time_text,

2795

'like_count': votes,

2796

'is_favorited': is_favorited,

2797

'author': author,

2798

'author_id': author_id,

2799

'author_thumbnail': author_thumbnail,

2800

'author_is_uploader': author_is_uploader,

2801

'parent': parent or 'root'

2802

}

2803

2804

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2805

2806

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2807

2808

def extract_header(contents):

2809

_continuation = None

2810

for content in contents:

2811

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2812

expected_comment_count = self._get_count(

2813

comments_header_renderer, 'countText', 'commentsCount')

2814

2815

if expected_comment_count:

2816

tracker['est_total'] = expected_comment_count

2817

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2818

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2819

2820

sort_menu_item = try_get(

2821

comments_header_renderer,

2822

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2823

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2824

2825

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2826

if not _continuation:

2827

continue

2828

2829

sort_text = str_or_none(sort_menu_item.get('title'))

2830

if not sort_text:

2831

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2832

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2837

if not parent:

2838

tracker['current_page_thread'] = 0

2839

for content in contents:

2840

if not parent and tracker['total_parent_comments'] >= max_parents:

2841

yield

2842

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2843

comment_renderer = get_first(

2844

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2845

expected_type=dict, default={})

2846

2847

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2852

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2853

yield comment

2854

2855

# Attempt to get the replies

2856

comment_replies_renderer = try_get(

2857

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2858

2859

if comment_replies_renderer:

2860

tracker['current_page_thread'] += 1

2861

comment_entries_iter = self._comment_entries(

2862

comment_replies_renderer, ytcfg, video_id,

2863

parent=comment.get('id'), tracker=tracker)

2864

yield from itertools.islice(comment_entries_iter, min(

2865

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2866

2867

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2873

total_parent_comments=0,

2874

total_reply_comments=0)

2875

2876

# TODO: Deprecated

2877

# YouTube comments have a max depth of 2

2878

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2879

if max_depth:

2880

self._downloader.deprecation_warning(

2881

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2882

if max_depth == 1 and parent:

2883

return

2884

2885

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2886

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2887

2888

continuation = self._extract_continuation(root_continuation_data)

2889

2890

response = None

2891

is_forced_continuation = False

2892

is_first_continuation = parent is None

2893

if is_first_continuation and not continuation:

2894

# Sometimes you can get comments by generating the continuation yourself,

2895

# even if YouTube initially reports them being disabled - e.g. stories comments.

2896

# Note: if the comment section is actually disabled, YouTube may return a response with

2897

# required check_get_keys missing. So we will disable that check initially in this case.

2898

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2899

is_forced_continuation = True

2900

2901

for page_num in itertools.count(0):

2902

if not continuation:

2903

break

2904

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2905

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2906

if page_num == 0:

2907

if is_first_continuation:

2908

note_prefix = 'Downloading comment section API JSON'

2909

else:

2910

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2911

tracker['current_page_thread'], comment_prog_str)

2912

else:

2913

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2914

' ' if parent else '', ' replies' if parent else '',

2915

page_num, comment_prog_str)

2916

2917

response = self._extract_response(

2918

item_id=None, query=continuation,

2919

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2920

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2921

is_forced_continuation = False

2922

continuation_contents = traverse_obj(

2923

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2924

2925

continuation = None

2926

for continuation_section in continuation_contents:

2927

continuation_items = traverse_obj(

2928

continuation_section,

2929

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2930

get_all=False, expected_type=list) or []

2931

if is_first_continuation:

2932

continuation = extract_header(continuation_items)

2933

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2947

if message and not parent and tracker['running_total'] == 0:

2948

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2949

2950

@staticmethod

2951

def _generate_comment_continuation(video_id):

2952

"""

2953

Generates initial comment section continuation token from given video id

2954

"""

2955

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2956

return base64.b64encode(token.encode()).decode()

2957

2958

def _get_comments(self, ytcfg, video_id, contents, webpage):

2959

"""Entry for comment extraction"""

2960

def _real_comment_extract(contents):

2961

renderer = next((

2962

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2963

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2964

yield from self._comment_entries(renderer, ytcfg, video_id)

2965

2966

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2967

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2968

2969

@staticmethod

2970

def _get_checkok_params():

2971

return {'contentCheckOk': True, 'racyCheckOk': True}

2972

2973

@classmethod

2974

def _generate_player_context(cls, sts=None):

2975

context = {

2976

'html5Preference': 'HTML5_PREF_WANTS',

2977

}

2978

if sts is not None:

2979

context['signatureTimestamp'] = sts

2980

return {

2981

'playbackContext': {

2982

'contentPlaybackContext': context

2983

},

2984

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2989

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2990

return True

2991

2992

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2993

AGE_GATE_REASONS = (

2994

'confirm your age', 'age-restricted', 'inappropriate', # reason

2995

'age_verification_required', 'age_check_required', # status

2996

)

2997

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2998

2999

@staticmethod

3000

def _is_unplayable(player_response):

3001

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3002

3003

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3004

3005

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3006

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3007

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3008

headers = self.generate_api_headers(

3009

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3014

}

3015

yt_query.update(self._generate_player_context(sts))

3016

return self._extract_response(

3017

item_id=video_id, ep='player', query=yt_query,

3018

ytcfg=player_ytcfg, headers=headers, fatal=True,

3019

default_client=client,

3020

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3021

) or None

3022

3023

def _get_requested_clients(self, url, smuggled_data):

3024

requested_clients = []

3025

default = ['android', 'web']

3026

allowed_clients = sorted(

3027

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3028

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3029

for client in self._configuration_arg('player_client'):

3030

if client in allowed_clients:

3031

requested_clients.append(client)

3032

elif client == 'default':

3033

requested_clients.extend(default)

3034

elif client == 'all':

3035

requested_clients.extend(allowed_clients)

3036

else:

3037

self.report_warning(f'Skipping unsupported client {client}')

3038

if not requested_clients:

3039

requested_clients = default

3040

3041

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3042

requested_clients.extend(

3043

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3044

3045

return orderedSet(requested_clients)

3046

3047

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3048

initial_pr = None

3049

if webpage:

3050

initial_pr = self._search_json(

3051

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3052

3053

all_clients = set(clients)

3054

clients = clients[::-1]

3055

prs = []

3056

3057

def append_client(*client_names):

3058

""" Append the first client name that exists but not already used """

3059

for client_name in client_names:

3060

actual_client = _split_innertube_client(client_name)[0]

3061

if actual_client in INNERTUBE_CLIENTS:

3062

if actual_client not in all_clients:

3063

clients.append(client_name)

3064

all_clients.add(actual_client)

3065

return

3066

3067

# Android player_response does not have microFormats which are needed for

3068

# extraction of some data. So we return the initial_pr with formats

3069

# stripped out even if not requested by the user

3070

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3071

if initial_pr:

3072

pr = dict(initial_pr)

3073

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3078

player_url = None

3079

while clients:

3080

client, base_client, variant = _split_innertube_client(clients.pop())

3081

player_ytcfg = master_ytcfg if client == 'web' else {}

3082

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3083

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3084

3085

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3086

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3087

if 'js' in self._configuration_arg('player_skip'):

3088

require_js_player = False

3089

player_url = None

3090

3091

if not player_url and not tried_iframe_fallback and require_js_player:

3092

player_url = self._download_player_url(video_id)

3093

tried_iframe_fallback = True

3094

3095

try:

3096

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3097

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3098

except ExtractorError as e:

3099

if last_error:

3100

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3108

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3109

append_client(f'{base_client}_creator')

3110

elif self._is_agegated(pr):

3111

if variant == 'tv_embedded':

3112

append_client(f'{base_client}_embedded')

3113

elif not variant:

3114

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3120

return prs, player_url

3121

3122

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3123

itags, stream_ids = {}, []

3124

itag_qualities, res_qualities = {}, {}

3125

q = qualities([

3126

# Normally tiny is the smallest video-only formats. But

3127

# audio-only formats with unknown quality may get tagged as tiny

3128

'tiny',

3129

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3130

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3131

])

3132

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3133

3134

for fmt in streaming_formats:

3135

if fmt.get('targetDurationSec'):

3136

continue

3137

3138

itag = str_or_none(fmt.get('itag'))

3139

audio_track = fmt.get('audioTrack') or {}

3140

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3141

if stream_id in stream_ids:

3142

continue

3143

3144

quality = fmt.get('quality')

3145

height = int_or_none(fmt.get('height'))

3146

if quality == 'tiny' or not quality:

3147

quality = fmt.get('audioQuality', '').lower() or quality

3148

# The 3gp format (17) in android client has a quality of "small",

3149

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3155

if height:

3156

res_qualities[height] = quality

3157

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3158

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3159

# number of fragment that would subsequently requested with (`&sq=N`)

3160

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3161

continue

3162

3163

fmt_url = fmt.get('url')

3164

if not fmt_url:

3165

sc = compat_parse_qs(fmt.get('signatureCipher'))

3166

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3167

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3168

if not all((sc, fmt_url, player_url, encrypted_sig)):

3169

continue

3170

try:

3171

fmt_url += '&%s=%s' % (

3172

traverse_obj(sc, ('sp', -1)) or 'signature',

3173

self._decrypt_signature(encrypted_sig, video_id, player_url)

3174

)

3175

except ExtractorError as e:

3176

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3177

self.write_debug(e, only_once=True)

3178

continue

3179

3180

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3185

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3186

except ExtractorError as e:

3187

self.report_warning(

3188

'nsig extraction failed: You may experience throttling for some formats\n'

3189

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3190

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3195

stream_ids.append(stream_id)

3196

3197

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3198

language_preference = (

3199

10 if audio_track.get('audioIsDefault') and 10

3200

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3201

else -1)

3202

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3203

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3204

# Make sure to avoid false positives with small duration differences.

3205

# Eg: __2ABJjxzNo, ySuUZEjARPY

3206

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3207

if is_damaged:

3208

self.report_warning(

3209

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3210

dct = {

3211

'asr': int_or_none(fmt.get('audioSampleRate')),

3212

'filesize': int_or_none(fmt.get('contentLength')),

3213

'format_id': itag,

3214

'format_note': join_nonempty(

3215

'%s%s' % (audio_track.get('displayName') or '',

3216

' (default)' if language_preference > 0 else ''),

3217

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3218

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3219

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3220

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3221

'fps': int_or_none(fmt.get('fps')) or None,

3222

'height': height,

3223

'quality': q(quality),

3224

'has_drm': bool(fmt.get('drmFamilies')),

3225

'tbr': tbr,

3226

'url': fmt_url,

3227

'width': int_or_none(fmt.get('width')),

3228

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3229

'desc' if language_preference < -1 else ''),

3230

'language_preference': language_preference,

3231

# Strictly de-prioritize damaged and 3gp formats

3232

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3233

}

3234

mime_mobj = re.match(

3235

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3236

if mime_mobj:

3237

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3238

dct.update(parse_codecs(mime_mobj.group(2)))

3239

no_audio = dct.get('acodec') == 'none'

3240

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3246

dct['downloader_options'] = {

3247

# Youtube throttles chunks >~10M

3248

'http_chunk_size': 10485760,

3249

}

3250

if dct.get('ext'):

3251

dct['container'] = dct['ext'] + '_dash'

3252

yield dct

3253

3254

live_from_start = is_live and self.get_param('live_from_start')

3255

skip_manifests = self._configuration_arg('skip')

3256

if not self.get_param('youtube_include_hls_manifest', True):

3257

skip_manifests.append('hls')

3258

if not self.get_param('youtube_include_dash_manifest', True):

3259

skip_manifests.append('dash')

3260

get_dash = 'dash' not in skip_manifests and (

3261

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3262

get_hls = not live_from_start and 'hls' not in skip_manifests

3263

3264

def process_manifest_format(f, proto, itag):

3265

if itag in itags:

3266

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3267

return False

3268

itag = f'{itag}-{proto}'

3269

if itag:

3270

f['format_id'] = itag

3271

itags[itag] = proto

3272

3273

f['quality'] = next((

3274

q(qdict[val])

3275

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3276

if val in qdict), -1)

3277

return True

3278

3279

for sd in streaming_data:

3280

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3281

if hls_manifest_url:

3282

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3283

if process_manifest_format(f, 'hls', self._search_regex(

3284

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3285

yield f

3286

3287

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3288

if dash_manifest_url:

3289

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3290

if process_manifest_format(f, 'dash', f['format_id']):

3291

f['filesize'] = int_or_none(self._search_regex(

3292

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3293

if live_from_start:

3294

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3299

spec = get_first(

3300

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3301

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3306

args = args.split('#')

3307

counts = list(map(int_or_none, args[:5]))

3308

if len(args) != 8 or not all(counts):

3309

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3310

continue

3311

width, height, frame_count, cols, rows = counts

3312

N, sigh = args[6:]

3313

3314

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3315

fragment_count = frame_count / (cols * rows)

3316

fragment_duration = duration / fragment_count

3317

yield {

3318

'format_id': f'sb{i}',

3319

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3329

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3330

} for j in range(math.ceil(fragment_count))],

3331

}

3332

3333

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3334

webpage = None

3335

if 'webpage' not in self._configuration_arg('player_skip'):

3336

webpage = self._download_webpage(

3337

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3338

3339

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3340

3341

player_responses, player_url = self._extract_player_responses(

3342

self._get_requested_clients(url, smuggled_data),

3343

video_id, webpage, master_ytcfg)

3344

3345

return webpage, master_ytcfg, player_responses, player_url

3346

3347

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3348

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3349

is_live = get_first(video_details, 'isLive')

3350

if is_live is None:

3351

is_live = get_first(live_broadcast_details, 'isLiveNow')

3352

3353

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3354

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3355

3356

return live_broadcast_details, is_live, streaming_data, formats

3357

3358

def _real_extract(self, url):

3359

url, smuggled_data = unsmuggle_url(url, {})

3360

video_id = self._match_id(url)

3361

3362

base_url = self.http_scheme() + '//www.youtube.com/'

3363

webpage_url = base_url + 'watch?v=' + video_id

3364

3365

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3366

3367

playability_statuses = traverse_obj(

3368

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3369

3370

trailer_video_id = get_first(

3371

playability_statuses,

3372

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3373

expected_type=str)

3374

if trailer_video_id:

3375

return self.url_result(

3376

trailer_video_id, self.ie_key(), trailer_video_id)

3377

3378

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3379

if webpage else (lambda x: None))

3380

3381

video_details = traverse_obj(

3382

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3383

microformats = traverse_obj(

3384

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3385

expected_type=dict, default=[])

3386

video_title = (

3387

get_first(video_details, 'title')

3388

or self._get_text(microformats, (..., 'title'))

3389

or search_meta(['og:title', 'twitter:title', 'title']))

3390

video_description = get_first(video_details, 'shortDescription')

3391

3392

multifeed_metadata_list = get_first(

3393

player_responses,

3394

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3395

expected_type=str)

3396

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3397

if self.get_param('noplaylist'):

3398

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3403

# Unquote should take place before split on comma (,) since textual

3404

# fields may contain comma as well (see

3405

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3406

feed_data = compat_parse_qs(

3407

compat_urllib_parse_unquote_plus(feed))

3408

3409

def feed_entry(name):

3410

return try_get(

3411

feed_data, lambda x: x[name][0], compat_str)

3412

3413

feed_id = feed_entry('id')

3414

if not feed_id:

3415

continue

3416

feed_title = feed_entry('title')

3417

title = video_title

3418

if feed_title:

3419

title += ' (%s)' % feed_title

3420

entries.append({

3421

'_type': 'url_transparent',

3422

'ie_key': 'Youtube',

3423

'url': smuggle_url(

3424

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3425

{'force_singlefeed': True}),

3426

'title': title,

3427

})

3428

feed_ids.append(feed_id)

3429

self.to_screen(

3430

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3431

% (', '.join(feed_ids), video_id))

3432

return self.playlist_result(

3433

entries, video_id, video_title, video_description)

3434

3435

duration = int_or_none(

3436

get_first(video_details, 'lengthSeconds')

3437

or get_first(microformats, 'lengthSeconds')

3438

or parse_duration(search_meta('duration'))) or None

3439

3440

if get_first(video_details, 'isPostLiveDvr'):

3441

self.write_debug('Video is in Post-Live Manifestless mode')

3442

if duration or 0 > 4 * 3600:

3443

self.report_warning(

3444

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3445

'This is a known issue and patches are welcome')

3446

3447

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3448

video_id, microformats, video_details, player_responses, player_url, duration)

3449

3450

if not formats:

3451

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3452

self.report_drm(video_id)

3453

pemr = get_first(

3454

playability_statuses,

3455

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3456

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3457

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3458

if subreason:

3459

if subreason == 'The uploader has not made this video available in your country.':

3460

countries = get_first(microformats, 'availableCountries')

3461

if not countries:

3462

regions_allowed = search_meta('regionsAllowed')

3463

countries = regions_allowed.split(',') if regions_allowed else None

3464

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3465

reason += f'. {subreason}'

3466

if reason:

3467

self.raise_no_formats(reason, expected=True)

3468

3469

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3470

if not keywords and webpage:

3471

keywords = [

3472

unescapeHTML(m.group('content'))

3473

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3474

for keyword in keywords:

3475

if keyword.startswith('yt:stretch='):

3476

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3477

if mobj:

3478

# NB: float is intentional for forcing float division

3479

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3484

f['stretched_ratio'] = ratio

3485

break

3486

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3487

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3488

if thumbnail_url:

3489

thumbnails.append({

3490

'url': thumbnail_url,

3491

})

3492

original_thumbnails = thumbnails.copy()

3493

3494

# The best resolution thumbnails sometimes does not appear in the webpage

3495

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3496

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3497

thumbnail_names = [

3498

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3499

# in resolution, these are not the custom thumbnail. So de-prioritize them

3500

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3501

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3502

]

3503

n_thumbnail_names = len(thumbnail_names)

3504

thumbnails.extend({

3505

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3506

video_id=video_id, name=name, ext=ext,

3507

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3508

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3509

for thumb in thumbnails:

3510

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3511

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3512

self._remove_duplicate_formats(thumbnails)

3513

self._downloader._sort_thumbnails(original_thumbnails)

3514

3515

category = get_first(microformats, 'category') or search_meta('genre')

3516

channel_id = str_or_none(

3517

get_first(video_details, 'channelId')

3518

or get_first(microformats, 'externalChannelId')

3519

or search_meta('channelId'))

3520

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3521

3522

live_content = get_first(video_details, 'isLiveContent')

3523

is_upcoming = get_first(video_details, 'isUpcoming')

3524

if is_live is None:

3525

if is_upcoming or live_content is False:

3526

is_live = False

3527

if is_upcoming is None and (live_content or is_live):

3528

is_upcoming = False

3529

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3530

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3531

if not duration and live_end_time and live_start_time:

3532

duration = live_end_time - live_start_time

3533

3534

if is_live and self.get_param('live_from_start'):

3535

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3536

3537

formats.extend(self._extract_storyboard(player_responses, duration))

3538

3539

# Source is given priority since formats that throttle are given lower source_preference

3540

# When throttling issue is fully fixed, remove this

3541

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3546

'formats': formats,

3547

'thumbnails': thumbnails,

3548

# The best thumbnail that we are sure exists. Prevents unnecessary

3549

# URL checking if user don't care about getting the best possible thumbnail

3550

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3551

'description': video_description,

3552

'uploader': get_first(video_details, 'author'),

3553

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3554

'uploader_url': owner_profile_url,

3555

'channel_id': channel_id,

3556

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3557

'duration': duration,

3558

'view_count': int_or_none(

3559

get_first((video_details, microformats), (..., 'viewCount'))

3560

or search_meta('interactionCount')),

3561

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3562

'age_limit': 18 if (

3563

get_first(microformats, 'isFamilySafe') is False

3564

or search_meta('isFamilyFriendly') == 'false'

3565

or search_meta('og:restrictions:age') == '18+') else 0,

3566

'webpage_url': webpage_url,

3567

'categories': [category] if category else None,

3568

'tags': keywords,

3569

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3570

'is_live': is_live,

3571

'was_live': (False if is_live or is_upcoming or live_content is False

3572

else None if is_live is None or is_upcoming is None

3573

else live_content),

3574

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3575

'release_timestamp': live_start_time,

3576

}

3577

3578

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3579

if pctr:

3580

def get_lang_code(track):

3581

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3582

or track.get('languageCode'))

3583

3584

# Converted into dicts to remove duplicates

3585

captions = {

3586

get_lang_code(sub): sub

3587

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3588

translation_languages = {

3589

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3590

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3591

3592

def process_language(container, base_url, lang_code, sub_name, query):

3593

lang_subs = container.setdefault(lang_code, [])

3594

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3605

for lang_code, caption_track in captions.items():

3606

base_url = caption_track.get('baseUrl')

3607

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3608

if not base_url:

3609

continue

3610

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3611

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3616

if not caption_track.get('isTranslatable'):

3617

continue

3618

for trans_code, trans_name in translation_languages.items():

3619

if not trans_code:

3620

continue

3621

orig_trans_code = trans_code

3622

if caption_track.get('kind') != 'asr':

3623

if 'translated_subs' in self._configuration_arg('skip'):

3624

continue

3625

trans_code += f'-{lang_code}'

3626

trans_name += format_field(lang_name, template=' from %s')

3627

# Add an "-orig" label to the original language so that it can be distinguished.

3628

# The subs are returned without "-orig" as well for compatibility

3629

if lang_code == f'a-{orig_trans_code}':

3630

process_language(

3631

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3632

# Setting tlang=lang returns damaged subtitles.

3633

process_language(automatic_captions, base_url, trans_code, trans_name,

3634

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3635

info['automatic_captions'] = automatic_captions

3636

info['subtitles'] = subtitles

3637

3638

parsed_url = compat_urllib_parse_urlparse(url)

3639

for component in [parsed_url.fragment, parsed_url.query]:

3640

query = compat_parse_qs(component)

3641

for k, v in query.items():

3642

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3643

d_k += '_time'

3644

if d_k not in info and k in s_ks:

3645

info[d_k] = parse_duration(query[k][0])

3646

3647

# Youtube Music Auto-generated description

3648

if video_description:

3649

mobj = re.search(

3650

r'''(?xs)

3651

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3652

(?P<album>[^\n]+)

3653

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3654

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3655

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3656

.+\nAuto-generated\ by\ YouTube\.\s*$

3657

''', video_description)

3658

if mobj:

3659

release_year = mobj.group('release_year')

3660

release_date = mobj.group('release_date')

3661

if release_date:

3662

release_date = release_date.replace('-', '')

3663

if not release_year:

3664

release_year = release_date[:4]

3665

info.update({

3666

'album': mobj.group('album'.strip()),

3667

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3668

'track': mobj.group('track').strip(),

3669

'release_date': release_date,

3670

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._search_json(

3676

self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', video_id, fatal=False)

3677

if not initial_data:

3678

query = {'videoId': video_id}

3679

query.update(self._get_checkok_params())

3680

initial_data = self._extract_response(

3681

item_id=video_id, ep='next', fatal=False,

3682

ytcfg=master_ytcfg, query=query,

3683

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3684

note='Downloading initial data API JSON')

3685

3686

try: # This will error if there is no livechat

3687

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3688

except (KeyError, IndexError, TypeError):

3689

pass

3690

else:

3691

info.setdefault('subtitles', {})['live_chat'] = [{

3692

'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies

3693

'video_id': video_id,

3694

'ext': 'json',

3695

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3701

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3702

or self._extract_chapters_from_description(video_description, duration)

3703

or None)

3704

3705

contents = traverse_obj(

3706

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3707

expected_type=list, default=[])

3708

3709

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3710

if vpir:

3711

stl = vpir.get('superTitleLink')

3712

if stl:

3713

stl = self._get_text(stl)

3714

if try_get(

3715

vpir,

3716

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3717

info['location'] = stl

3718

else:

3719

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3720

if mobj:

3721

info.update({

3722

'series': mobj.group(1),

3723

'season_number': int(mobj.group(2)),

3724

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3729

list) or []):

3730

tbr = tlb.get('toggleButtonRenderer') or {}

3731

for getter, regex in [(

3732

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3733

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3734

lambda x: x['accessibility'],

3735

lambda x: x['accessibilityData']['accessibilityData'],

3736

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3737

label = (try_get(tbr, getter, dict) or {}).get('label')

3738

if label:

3739

mobj = re.match(regex, label)

3740

if mobj:

3741

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3742

break

3743

sbr_tooltip = try_get(

3744

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3745

if sbr_tooltip:

3746

like_count, dislike_count = sbr_tooltip.split(' / ')

3747

info.update({

3748

'like_count': str_to_int(like_count),

3749

'dislike_count': str_to_int(dislike_count),

3750

})

3751

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3752

if vsir:

3753

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3754

info.update({

3755

'channel': self._get_text(vor, 'title'),

3756

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3761

list) or []

3762

multiple_songs = False

3763

for row in rows:

3764

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3765

multiple_songs = True

3766

break

3767

for row in rows:

3768

mrr = row.get('metadataRowRenderer') or {}

3769

mrr_title = mrr.get('title')

3770

if not mrr_title:

3771

continue

3772

mrr_title = self._get_text(mrr, 'title')

3773

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3774

if mrr_title == 'License':

3775

info['license'] = mrr_contents_text

3776

elif not multiple_songs:

3777

if mrr_title == 'Album':

3778

info['album'] = mrr_contents_text

3779

elif mrr_title == 'Artist':

3780

info['artist'] = mrr_contents_text

3781

elif mrr_title == 'Song':

3782

info['track'] = mrr_contents_text

3783

3784

fallbacks = {

3785

'channel': 'uploader',

3786

'channel_id': 'uploader_id',

3787

'channel_url': 'uploader_url',

3788

}

3789

3790

# The upload date for scheduled, live and past live streams / premieres in microformats

3791

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3792

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3793

upload_date = (

3794

unified_strdate(get_first(microformats, 'uploadDate'))

3795

or unified_strdate(search_meta('uploadDate')))

3796

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3797

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3798

info['upload_date'] = upload_date

3799

3800

for to, frm in fallbacks.items():

3801

if not info.get(to):

3802

info[to] = info.get(frm)

3803

3804

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3810

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3811

is_membersonly = None

3812

is_premium = None

3813

if initial_data and is_private is not None:

3814

is_membersonly = False

3815

is_premium = False

3816

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3817

badge_labels = set()

3818

for content in contents:

3819

if not isinstance(content, dict):

3820

continue

3821

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3822

for badge_label in badge_labels:

3823

if badge_label.lower() == 'members only':

3824

is_membersonly = True

3825

elif badge_label.lower() == 'premium':

3826

is_premium = True

3827

elif badge_label.lower() == 'unlisted':

3828

is_unlisted = True

3829

3830

info['availability'] = self._availability(

3831

is_private=is_private,

3832

needs_premium=is_premium,

3833

needs_subscription=is_membersonly,

3834

needs_auth=info['age_limit'] >= 18,

3835

is_unlisted=None if is_private is None else is_unlisted)

3836

3837

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3838

3839

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3845

3846

@staticmethod

3847

def passthrough_smuggled_data(func):

3848

def _smuggle(entries, smuggled_data):

3849

for entry in entries:

3850

# TODO: Convert URL to music.youtube instead.

3851

# Do we need to passthrough any other smuggled_data?

3852

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3853

yield entry

3854

3855

@functools.wraps(func)

3856

def wrapper(self, url):

3857

url, smuggled_data = unsmuggle_url(url, {})

3858

if self.is_music_url(url):

3859

smuggled_data['is_music_url'] = True

3860

info_dict = func(self, url, smuggled_data)

3861

if smuggled_data and info_dict.get('entries'):

3862

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3867

channel_id = self._html_search_meta(

3868

'channelId', webpage, 'channel id', default=None)

3869

if channel_id:

3870

return channel_id

3871

channel_url = self._html_search_meta(

3872

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3873

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3874

'twitter:app:url:googleplay'), webpage, 'channel url')

3875

return self._search_regex(

3876

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3877

channel_url, 'channel id')

3878

3879

@staticmethod

3880

def _extract_basic_item_renderer(item):

3881

# Modified from _extract_grid_item_renderer

3882

known_basic_renderers = (

3883

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3884

)

3885

for key, renderer in item.items():

3886

if not isinstance(renderer, dict):

3887

continue

3888

elif key in known_basic_renderers:

3889

return renderer

3890

elif key.startswith('grid') and key.endswith('Renderer'):

3891

return renderer

3892

3893

def _grid_entries(self, grid_renderer):

3894

for item in grid_renderer['items']:

3895

if not isinstance(item, dict):

3896

continue

3897

renderer = self._extract_basic_item_renderer(item)

3898

if not isinstance(renderer, dict):

3899

continue

3900

title = self._get_text(renderer, 'title')

3901

3902

# playlist

3903

playlist_id = renderer.get('playlistId')

3904

if playlist_id:

3905

yield self.url_result(

3906

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3907

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3912

if video_id:

3913

yield self._extract_video(renderer)

3914

continue

3915

# channel

3916

channel_id = renderer.get('channelId')

3917

if channel_id:

3918

yield self.url_result(

3919

'https://www.youtube.com/channel/%s' % channel_id,

3920

ie=YoutubeTabIE.ie_key(), video_title=title)

3921

continue

3922

# generic endpoint URL support

3923

ep_url = urljoin('https://www.youtube.com/', try_get(

3924

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3925

compat_str))

3926

if ep_url:

3927

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3928

if ie.suitable(ep_url):

3929

yield self.url_result(

3930

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3931

break

3932

3933

def _music_reponsive_list_entry(self, renderer):

3934

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3935

if video_id:

3936

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3937

ie=YoutubeIE.ie_key(), video_id=video_id)

3938

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3939

if playlist_id:

3940

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3941

if video_id:

3942

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3943

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3944

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3945

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3946

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3947

if browse_id:

3948

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3949

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3950

3951

def _shelf_entries_from_content(self, shelf_renderer):

3952

content = shelf_renderer.get('content')

3953

if not isinstance(content, dict):

3954

return

3955

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3956

if renderer:

3957

# TODO: add support for nested playlists so each shelf is processed

3958

# as separate playlist

3959

# TODO: this includes only first N items

3960

yield from self._grid_entries(renderer)

3961

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3967

ep = try_get(

3968

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3969

compat_str)

3970

shelf_url = urljoin('https://www.youtube.com', ep)

3971

if shelf_url:

3972

# Skipping links to another channels, note that checking for

3973

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3974

# will not work

3975

if skip_channels and '/channels?' in shelf_url:

3976

return

3977

title = self._get_text(shelf_renderer, 'title')

3978

yield self.url_result(shelf_url, video_title=title)

3979

# Shelf may not contain shelf URL, fallback to extraction from content

3980

yield from self._shelf_entries_from_content(shelf_renderer)

3981

3982

def _playlist_entries(self, video_list_renderer):

3983

for content in video_list_renderer['contents']:

3984

if not isinstance(content, dict):

3985

continue

3986

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3987

if not isinstance(renderer, dict):

3988

continue

3989

video_id = renderer.get('videoId')

3990

if not video_id:

3991

continue

3992

yield self._extract_video(renderer)

3993

3994

def _rich_entries(self, rich_grid_renderer):

3995

renderer = try_get(

3996

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3997

video_id = renderer.get('videoId')

3998

if not video_id:

3999

return

4000

yield self._extract_video(renderer)

4001

4002

def _video_entry(self, video_renderer):

4003

video_id = video_renderer.get('videoId')

4004

if video_id:

4005

return self._extract_video(video_renderer)

4006

4007

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4008

url = urljoin('https://youtube.com', traverse_obj(

4009

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4010

if url:

4011

return self.url_result(

4012

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4013

4014

def _post_thread_entries(self, post_thread_renderer):

4015

post_renderer = try_get(

4016

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4017

if not post_renderer:

4018

return

4019

# video attachment

4020

video_renderer = try_get(

4021

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4022

video_id = video_renderer.get('videoId')

4023

if video_id:

4024

entry = self._extract_video(video_renderer)

4025

if entry:

4026

yield entry

4027

# playlist attachment

4028

playlist_id = try_get(

4029

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

4030

if playlist_id:

4031

yield self.url_result(

4032

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4033

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4034

# inline video links

4035

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4036

for run in runs:

4037

if not isinstance(run, dict):

4038

continue

4039

ep_url = try_get(

4040

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

4041

if not ep_url:

4042

continue

4043

if not YoutubeIE.suitable(ep_url):

4044

continue

4045

ep_video_id = YoutubeIE._match_id(ep_url)

4046

if video_id == ep_video_id:

4047

continue

4048

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4049

4050

def _post_thread_continuation_entries(self, post_thread_continuation):

4051

contents = post_thread_continuation.get('contents')

4052

if not isinstance(contents, list):

4053

return

4054

for content in contents:

4055

renderer = content.get('backstagePostThreadRenderer')

4056

if isinstance(renderer, dict):

4057

yield from self._post_thread_entries(renderer)

4058

continue

4059

renderer = content.get('videoRenderer')

4060

if isinstance(renderer, dict):

4061

yield self._video_entry(renderer)

4062

4063

r''' # unused

4064

def _rich_grid_entries(self, contents):

4065

for content in contents:

4066

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4067

if video_renderer:

4068

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4074

# continuation_list is modified in-place with continuation_list = [continuation_token]

4075

continuation_list[:] = [None]

4076

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4077

for content in contents:

4078

if not isinstance(content, dict):

4079

continue

4080

is_renderer = traverse_obj(

4081

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4082

expected_type=dict)

4083

if not is_renderer:

4084

renderer = content.get('richItemRenderer')

4085

if renderer:

4086

for entry in self._rich_entries(renderer):

4087

yield entry

4088

continuation_list[0] = self._extract_continuation(parent_renderer)

4089

continue

4090

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4091

for isr_content in isr_contents:

4092

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4097

'gridRenderer': self._grid_entries,

4098

'reelShelfRenderer': self._grid_entries,

4099

'shelfRenderer': self._shelf_entries,

4100

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4101

'backstagePostThreadRenderer': self._post_thread_entries,

4102

'videoRenderer': lambda x: [self._video_entry(x)],

4103

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4104

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4105

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4106

}

4107

for key, renderer in isr_content.items():

4108

if key not in known_renderers:

4109

continue

4110

for entry in known_renderers[key](renderer):

4111

if entry:

4112

yield entry

4113

continuation_list[0] = self._extract_continuation(renderer)

4114

break

4115

4116

if not continuation_list[0]:

4117

continuation_list[0] = self._extract_continuation(is_renderer)

4118

4119

if not continuation_list[0]:

4120

continuation_list[0] = self._extract_continuation(parent_renderer)

4121

4122

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4123

continuation_list = [None]

4124

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4125

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4130

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4131

yield from extract_entries(parent_renderer)

4132

continuation = continuation_list[0]

4133

4134

for page_num in itertools.count(1):

4135

if not continuation:

4136

break

4137

headers = self.generate_api_headers(

4138

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4139

response = self._extract_response(

4140

item_id=f'{item_id} page {page_num}',

4141

query=continuation, headers=headers, ytcfg=ytcfg,

4142

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4147

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4148

visitor_data = self._extract_visitor_data(response) or visitor_data

4149

4150

known_continuation_renderers = {

4151

'playlistVideoListContinuation': self._playlist_entries,

4152

'gridContinuation': self._grid_entries,

4153

'itemSectionContinuation': self._post_thread_continuation_entries,

4154

'sectionListContinuation': extract_entries, # for feeds

4155

}

4156

continuation_contents = try_get(

4157

response, lambda x: x['continuationContents'], dict) or {}

4158

continuation_renderer = None

4159

for key, value in continuation_contents.items():

4160

if key not in known_continuation_renderers:

4161

continue

4162

continuation_renderer = value

4163

continuation_list = [None]

4164

yield from known_continuation_renderers[key](continuation_renderer)

4165

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4166

break

4167

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4172

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4173

'gridVideoRenderer': (self._grid_entries, 'items'),

4174

'gridChannelRenderer': (self._grid_entries, 'items'),

4175

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4176

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4177

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4178

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4179

}

4180

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4181

continuation_items = try_get(

4182

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4183

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4184

video_items_renderer = None

4185

for key, value in continuation_item.items():

4186

if key not in known_renderers:

4187

continue

4188

video_items_renderer = {known_renderers[key][1]: continuation_items}

4189

continuation_list = [None]

4190

yield from known_renderers[key][0](video_items_renderer)

4191

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4192

break

4193

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4199

for tab in tabs:

4200

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4201

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4206

4207

def _extract_uploader(self, data):

4208

uploader = {}

4209

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4210

owner = try_get(

4211

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4212

if owner:

4213

owner_text = owner.get('text')

4214

uploader['uploader'] = self._search_regex(

4215

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4216

uploader['uploader_id'] = try_get(

4217

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4218

uploader['uploader_url'] = urljoin(

4219

'https://www.youtube.com/',

4220

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4221

return {k: v for k, v in uploader.items() if v is not None}

4222

4223

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4224

playlist_id = title = description = channel_url = channel_name = channel_id = None

4225

tags = []

4226

4227

selected_tab = self._extract_selected_tab(tabs)

4228

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4229

renderer = try_get(

4230

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4231

if renderer:

4232

channel_name = renderer.get('title')

4233

channel_url = renderer.get('channelUrl')

4234

channel_id = renderer.get('externalId')

4235

else:

4236

renderer = try_get(

4237

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4238

4239

if renderer:

4240

title = renderer.get('title')

4241

description = renderer.get('description', '')

4242

playlist_id = channel_id

4243

tags = renderer.get('keywords', '').split()

4244

4245

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4246

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4247

def _get_uncropped(url):

4248

return url_or_none((url or '').split('=')[0] + '=s0')

4249

4250

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4251

if avatar_thumbnails:

4252

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4253

if uncropped_avatar:

4254

avatar_thumbnails.append({

4255

'url': uncropped_avatar,

4256

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4261

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4262

for banner in channel_banners:

4263

banner['preference'] = -10

4264

4265

if channel_banners:

4266

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4267

if uncropped_banner:

4268

channel_banners.append({

4269

'url': uncropped_banner,

4270

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4275

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4276

4277

if playlist_id is None:

4278

playlist_id = item_id

4279

4280

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4281

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4282

if title is None:

4283

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4284

title += format_field(selected_tab, 'title', ' - %s')

4285

title += format_field(selected_tab, 'expandedText', ' - %s')

4286

4287

metadata = {

4288

'playlist_id': playlist_id,

4289

'playlist_title': title,

4290

'playlist_description': description,

4291

'uploader': channel_name,

4292

'uploader_id': channel_id,

4293

'uploader_url': channel_url,

4294

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4295

'tags': tags,

4296

'view_count': self._get_count(playlist_stats, 1),

4297

'availability': self._extract_availability(data),

4298

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4299

'playlist_count': self._get_count(playlist_stats, 0),

4300

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4301

}

4302

if not channel_id:

4303

metadata.update(self._extract_uploader(data))

4304

metadata.update({

4305

'channel': metadata['uploader'],

4306

'channel_id': metadata['uploader_id'],

4307

'channel_url': metadata['uploader_url']})

4308

return self.playlist_result(

4309

self._entries(

4310

selected_tab, playlist_id, ytcfg,

4311

self._extract_account_syncid(ytcfg, data),

4312

self._extract_visitor_data(data, ytcfg)),

4313

**metadata)

4314

4315

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4316

first_id = last_id = response = None

4317

for page_num in itertools.count(1):

4318

videos = list(self._playlist_entries(playlist))

4319

if not videos:

4320

return

4321

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4322

if start >= len(videos):

4323

return

4324

yield from videos[start:]

4325

first_id = first_id or videos[0]['id']

4326

last_id = videos[-1]['id']

4327

watch_endpoint = try_get(

4328

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4329

headers = self.generate_api_headers(

4330

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4331

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4332

query = {

4333

'playlistId': playlist_id,

4334

'videoId': watch_endpoint.get('videoId') or last_id,

4335

'index': watch_endpoint.get('index') or len(videos),

4336

'params': watch_endpoint.get('params') or 'OAE%3D'

4337

}

4338

response = self._extract_response(

4339

item_id='%s page %d' % (playlist_id, page_num),

4340

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4341

check_get_keys='contents'

4342

)

4343

playlist = try_get(

4344

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4345

4346

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4347

title = playlist.get('title') or try_get(

4348

data, lambda x: x['titleText']['simpleText'], compat_str)

4349

playlist_id = playlist.get('playlistId') or item_id

4350

4351

# Delegating everything except mix playlists to regular tab-based playlist URL

4352

playlist_url = urljoin(url, try_get(

4353

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4354

compat_str))

4355

4356

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4357

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4358

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4359

4360

if playlist_url and playlist_url != url and not is_known_unviewable:

4361

return self.url_result(

4362

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4363

video_title=title)

4364

4365

return self.playlist_result(

4366

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4367

playlist_id=playlist_id, playlist_title=title)

4368

4369

def _extract_availability(self, data):

4370

"""

4371

Gets the availability of a given playlist/tab.

4372

Note: Unless YouTube tells us explicitly, we do not assume it is public

4373

@param data: response

4374

"""

4375

is_private = is_unlisted = None

4376

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4377

badge_labels = self._extract_badges(renderer)

4378

4379

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4380

privacy_dropdown_entries = try_get(

4381

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4382

for renderer_dict in privacy_dropdown_entries:

4383

is_selected = try_get(

4384

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4385

if not is_selected:

4386

continue

4387

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4388

if label:

4389

badge_labels.add(label.lower())

4390

break

4391

4392

for badge_label in badge_labels:

4393

if badge_label == 'unlisted':

4394

is_unlisted = True

4395

elif badge_label == 'private':

4396

is_private = True

4397

elif badge_label == 'public':

4398

is_unlisted = is_private = False

4399

return self._availability(is_private, False, False, False, is_unlisted)

4400

4401

@staticmethod

4402

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4403

sidebar_renderer = try_get(

4404

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4405

for item in sidebar_renderer:

4406

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4411

"""

4412

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4413

"""

4414

browse_id = params = None

4415

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4416

if not renderer:

4417

return

4418

menu_renderer = try_get(

4419

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4420

for menu_item in menu_renderer:

4421

if not isinstance(menu_item, dict):

4422

continue

4423

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4424

text = try_get(

4425

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4426

if not text or text.lower() != 'show unavailable videos':

4427

continue

4428

browse_endpoint = try_get(

4429

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4430

browse_id = browse_endpoint.get('browseId')

4431

params = browse_endpoint.get('params')

4432

break

4433

4434

headers = self.generate_api_headers(

4435

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4436

visitor_data=self._extract_visitor_data(data, ytcfg))

4437

query = {

4438

'params': params or 'wgYCCAA=',

4439

'browseId': browse_id or 'VL%s' % item_id

4440

}

4441

return self._extract_response(

4442

item_id=item_id, headers=headers, query=query,

4443

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4444

note='Downloading API JSON with unavailable videos')

4445

4446

@functools.cached_property

4447

def skip_webpage(self):

4448

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4449

4450

def _extract_webpage(self, url, item_id, fatal=True):

4451

retries = self.get_param('extractor_retries', 3)

4452

count = -1

4453

webpage = data = last_error = None

4454

while count < retries:

4455

count += 1

4456

# Sometimes youtube returns a webpage with incomplete ytInitialData

4457

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4458

if last_error:

4459

self.report_warning('%s. Retrying ...' % last_error)

4460

try:

4461

webpage = self._download_webpage(

4462

url, item_id,

4463

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4464

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4465

except ExtractorError as e:

4466

if isinstance(e.cause, network_exceptions):

4467

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4468

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4478

except ExtractorError as e:

4479

if fatal:

4480

raise

4481

self.report_warning(error_to_compat_str(e))

4482

break

4483

4484

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4485

break

4486

4487

last_error = 'Incomplete yt initial data received'

4488

if count >= retries:

4489

if fatal:

4490

raise ExtractorError(last_error)

4491

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4497

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4498

if not ytcfg and self.is_authenticated:

4499

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4500

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4501

raise ExtractorError(

4502

f'{msg}. If you are not downloading private content, or '

4503

'your cookies are only for the first account and channel,'

4504

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4505

expected=True)

4506

self.report_warning(msg, only_once=True)

4507

4508

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4509

data = None

4510

if not self.skip_webpage:

4511

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4512

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4513

# Reject webpage data if redirected to home page without explicitly requesting

4514

selected_tab = self._extract_selected_tab(traverse_obj(

4515

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4516

if (url != 'https://www.youtube.com/feed/recommended'

4517

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4518

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4519

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4520

if fatal:

4521

raise ExtractorError(msg, expected=True)

4522

self.report_warning(msg, only_once=True)

4523

if not data:

4524

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4525

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4526

return data, ytcfg

4527

4528

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4529

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4530

resolve_response = self._extract_response(

4531

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4532

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4533

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4534

for ep_key, ep in endpoints.items():

4535

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4536

if params:

4537

return self._extract_response(

4538

item_id=item_id, query=params, ep=ep, headers=headers,

4539

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4540

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4541

err_note = 'Failed to resolve url (does the playlist exist?)'

4542

if fatal:

4543

raise ExtractorError(err_note, expected=True)

4544

self.report_warning(err_note, item_id)

4545

4546

_SEARCH_PARAMS = None

4547

4548

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4549

data = {'query': query}

4550

if params is NO_DEFAULT:

4551

params = self._SEARCH_PARAMS

4552

if params:

4553

data['params'] = params

4554

4555

content_keys = (

4556

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4557

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4558

# ytmusic search

4559

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4560

('continuationContents', ),

4561

)

4562

display_id = f'query "{query}"'

4563

check_get_keys = tuple({keys[0] for keys in content_keys})

4564

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4565

self._report_playlist_authcheck(ytcfg, fatal=False)

4566

4567

continuation_list = [None]

4568

search = None

4569

for page_num in itertools.count(1):

4570

data.update(continuation_list[0] or {})

4571

headers = self.generate_api_headers(

4572

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4573

search = self._extract_response(

4574

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4575

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4576

slr_contents = traverse_obj(search, *content_keys)

4577

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4578

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4583

IE_DESC = 'YouTube Tabs'

4584

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4593

(?P<not_channel>

4594

feed/|hashtag/|

4595

(?:playlist|watch)\?.*?\blist=

4596

)|

4597

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4602

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4603

}

4604

IE_NAME = 'youtube:tab'

4605

4606

_TESTS = [{

4607

'note': 'playlists, multipage',

4608

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4609

'playlist_mincount': 94,

4610

'info_dict': {

4611

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4612

'title': 'Igor Kleiner - Playlists',

4613

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4614

'uploader': 'Igor Kleiner',

4615

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4616

'channel': 'Igor Kleiner',

4617

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4618

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4619

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4620

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4621

'channel_follower_count': int

4622

},

4623

}, {

4624

'note': 'playlists, multipage, different order',

4625

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4626

'playlist_mincount': 94,

4627

'info_dict': {

4628

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4629

'title': 'Igor Kleiner - Playlists',

4630

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4631

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4632

'uploader': 'Igor Kleiner',

4633

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4634

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4635

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4636

'channel': 'Igor Kleiner',

4637

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4638

'channel_follower_count': int

4639

},

4640

}, {

4641

'note': 'playlists, series',

4642

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4643

'playlist_mincount': 5,

4644

'info_dict': {

4645

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4646

'title': '3Blue1Brown - Playlists',

4647

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4648

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4649

'uploader': '3Blue1Brown',

4650

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4651

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4652

'channel': '3Blue1Brown',

4653

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4654

'tags': ['Mathematics'],

4655

'channel_follower_count': int

4656

},

4657

}, {

4658

'note': 'playlists, singlepage',

4659

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4660

'playlist_mincount': 4,

4661

'info_dict': {

4662

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4663

'title': 'ThirstForScience - Playlists',

4664

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4665

'uploader': 'ThirstForScience',

4666

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4667

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4668

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4669

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4670

'tags': 'count:13',

4671

'channel': 'ThirstForScience',

4672

'channel_follower_count': int

4673

}

4674

}, {

4675

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4676

'only_matching': True,

4677

}, {

4678

'note': 'basic, single video playlist',

4679

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4680

'info_dict': {

4681

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4682

'uploader': 'Sergey M.',

4683

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4684

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4689

'channel': 'Sergey M.',

4690

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4691

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4692

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4697

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4698

'info_dict': {

4699

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4700

'uploader': 'Sergey M.',

4701

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4702

'title': 'youtube-dl empty playlist',

4703

'tags': [],

4704

'channel': 'Sergey M.',

4705

'description': '',

4706

'modified_date': '20160902',

4707

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4708

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4709

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4715

'info_dict': {

4716

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4717

'title': 'lex will - Home',

4718

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4719

'uploader': 'lex will',

4720

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4721

'channel': 'lex will',

4722

'tags': ['bible', 'history', 'prophesy'],

4723

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4724

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4725

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4726

'channel_follower_count': int

4727

},

4728

'playlist_mincount': 2,

4729

}, {

4730

'note': 'Videos tab',

4731

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4732

'info_dict': {

4733

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4734

'title': 'lex will - Videos',

4735

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4736

'uploader': 'lex will',

4737

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4738

'tags': ['bible', 'history', 'prophesy'],

4739

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4740

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4741

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4742

'channel': 'lex will',

4743

'channel_follower_count': int

4744

},

4745

'playlist_mincount': 975,

4746

}, {

4747

'note': 'Videos tab, sorted by popular',

4748

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4749

'info_dict': {

4750

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4751

'title': 'lex will - Videos',

4752

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4753

'uploader': 'lex will',

4754

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4755

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4756

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4757

'channel': 'lex will',

4758

'tags': ['bible', 'history', 'prophesy'],

4759

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4760

'channel_follower_count': int

4761

},

4762

'playlist_mincount': 199,

4763

}, {

4764

'note': 'Playlists tab',

4765

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4766

'info_dict': {

4767

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4768

'title': 'lex will - Playlists',

4769

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4770

'uploader': 'lex will',

4771

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4772

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4773

'channel': 'lex will',

4774

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4775

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4776

'tags': ['bible', 'history', 'prophesy'],

4777

'channel_follower_count': int

4778

},

4779

'playlist_mincount': 17,

4780

}, {

4781

'note': 'Community tab',

4782

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4783

'info_dict': {

4784

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4785

'title': 'lex will - Community',

4786

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4787

'uploader': 'lex will',

4788

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4789

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4790

'channel': 'lex will',

4791

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4792

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4793

'tags': ['bible', 'history', 'prophesy'],

4794

'channel_follower_count': int

4795

},

4796

'playlist_mincount': 18,

4797

}, {

4798

'note': 'Channels tab',

4799

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4800

'info_dict': {

4801

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4802

'title': 'lex will - Channels',

4803

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4804

'uploader': 'lex will',

4805

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4806

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4807

'channel': 'lex will',

4808

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4810

'tags': ['bible', 'history', 'prophesy'],

4811

'channel_follower_count': int

4812

},

4813

'playlist_mincount': 12,

4814

}, {

4815

'note': 'Search tab',

4816

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4817

'playlist_mincount': 40,

4818

'info_dict': {

4819

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4820

'title': '3Blue1Brown - Search - linear algebra',

4821

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4822

'uploader': '3Blue1Brown',

4823

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4824

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4825

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4826

'tags': ['Mathematics'],

4827

'channel': '3Blue1Brown',

4828

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4829

'channel_follower_count': int

4830

},

4831

}, {

4832

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4833

'only_matching': True,

4834

}, {

4835

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4836

'only_matching': True,

4837

}, {

4838

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4839

'only_matching': True,

4840

}, {

4841

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4842

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4843

'info_dict': {

4844

'title': '29C3: Not my department',

4845

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4846

'uploader': 'Christiaan008',

4847

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4848

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4849

'tags': [],

4850

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4851

'view_count': int,

4852

'modified_date': '20150605',

4853

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4854

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4855

'channel': 'Christiaan008',

4856

},

4857

'playlist_count': 96,

4858

}, {

4859

'note': 'Large playlist',

4860

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4861

'info_dict': {

4862

'title': 'Uploads from Cauchemar',

4863

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4864

'uploader': 'Cauchemar',

4865

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4866

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4867

'tags': [],

4868

'modified_date': r're:\d{8}',

4869

'channel': 'Cauchemar',

4870

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4871

'view_count': int,

4872

'description': '',

4873

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4874

},

4875

'playlist_mincount': 1123,

4876

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4877

}, {

4878

'note': 'even larger playlist, 8832 videos',

4879

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4880

'only_matching': True,

4881

}, {

4882

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4883

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4884

'info_dict': {

4885

'title': 'Uploads from Interstellar Movie',

4886

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4887

'uploader': 'Interstellar Movie',

4888

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4889

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4890

'tags': [],

4891

'view_count': int,

4892

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4893

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4894

'channel': 'Interstellar Movie',

4895

'description': '',

4896

'modified_date': r're:\d{8}',

4897

},

4898

'playlist_mincount': 21,

4899

}, {

4900

'note': 'Playlist with "show unavailable videos" button',

4901

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4902

'info_dict': {

4903

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4904

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4905

'uploader': 'Phim Siêu Nhân Nhật Bản',

4906

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4907

'view_count': int,

4908

'channel': 'Phim Siêu Nhân Nhật Bản',

4909

'tags': [],

4910

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4911

'description': '',

4912

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4913

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4914

'modified_date': r're:\d{8}',

4915

},

4916

'playlist_mincount': 200,

4917

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4918

}, {

4919

'note': 'Playlist with unavailable videos in page 7',

4920

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4921

'info_dict': {

4922

'title': 'Uploads from BlankTV',

4923

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4924

'uploader': 'BlankTV',

4925

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4926

'channel': 'BlankTV',

4927

'channel_url': 'https://www.youtube.com/c/blanktv',

4928

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4929

'view_count': int,

4930

'tags': [],

4931

'uploader_url': 'https://www.youtube.com/c/blanktv',

4932

'modified_date': r're:\d{8}',

4933

'description': '',

4934

},

4935

'playlist_mincount': 1000,

4936

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4937

}, {

4938

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4939

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4940

'info_dict': {

4941

'title': 'Data Analysis with Dr Mike Pound',

4942

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4943

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4944

'uploader': 'Computerphile',

4945

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4946

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4947

'tags': [],

4948

'view_count': int,

4949

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4950

'channel_url': 'https://www.youtube.com/user/Computerphile',

4951

'channel': 'Computerphile',

4952

},

4953

'playlist_mincount': 11,

4954

}, {

4955

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4956

'only_matching': True,

4957

}, {

4958

'note': 'Playlist URL that does not actually serve a playlist',

4959

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4964

'uploader': 'STREEM',

4965

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4966

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4967

'upload_date': '20150526',

4968

'license': 'Standard YouTube License',

4969

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4970

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4977

},

4978

'skip': 'This video is not available.',

4979

'add_ie': [YoutubeIE.ie_key()],

4980

}, {

4981

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4982

'only_matching': True,

4983

}, {

4984

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4985

'only_matching': True,

4986

}, {

4987

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4988

'info_dict': {

4989

'id': 'GgL890LIznQ', # This will keep changing

4990

'ext': 'mp4',

4991

'title': str,

4992

'uploader': 'Sky News',

4993

'uploader_id': 'skynews',

4994

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4995

'upload_date': r're:\d{8}',

4996

'description': str,

4997

'categories': ['News & Politics'],

4998

'tags': list,

4999

'like_count': int,

5000

'release_timestamp': 1642502819,

5001

'channel': 'Sky News',

5002

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5003

'age_limit': 0,

5004

'view_count': int,

5005

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5006

'playable_in_embed': True,

5007

'release_date': '20220118',

5008

'availability': 'public',

5009

'live_status': 'is_live',

5010

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5011

'channel_follower_count': int

5012

},

5013

'params': {

5014

'skip_download': True,

5015

},

5016

'expected_warnings': ['Ignoring subtitle tracks found in '],

5017

}, {

5018

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5023

'uploader': 'The Young Turks',

5024

'uploader_id': 'TheYoungTurks',

5025

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5026

'upload_date': '20150715',

5027

'license': 'Standard YouTube License',

5028

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5029

'categories': ['News & Politics'],

5030

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5035

},

5036

'only_matching': True,

5037

}, {

5038

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5039

'only_matching': True,

5040

}, {

5041

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5042

'only_matching': True,

5043

}, {

5044

'note': 'A channel that is not live. Should raise error',

5045

'url': 'https://www.youtube.com/user/numberphile/live',

5046

'only_matching': True,

5047

}, {

5048

'url': 'https://www.youtube.com/feed/trending',

5049

'only_matching': True,

5050

}, {

5051

'url': 'https://www.youtube.com/feed/library',

5052

'only_matching': True,

5053

}, {

5054

'url': 'https://www.youtube.com/feed/history',

5055

'only_matching': True,

5056

}, {

5057

'url': 'https://www.youtube.com/feed/subscriptions',

5058

'only_matching': True,

5059

}, {

5060

'url': 'https://www.youtube.com/feed/watch_later',

5061

'only_matching': True,

5062

}, {

5063

'note': 'Recommended - redirects to home page.',

5064

'url': 'https://www.youtube.com/feed/recommended',

5065

'only_matching': True,

5066

}, {

5067

'note': 'inline playlist with not always working continuations',

5068

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5069

'only_matching': True,

5070

}, {

5071

'url': 'https://www.youtube.com/course',

5072

'only_matching': True,

5073

}, {

5074

'url': 'https://www.youtube.com/zsecurity',

5075

'only_matching': True,

5076

}, {

5077

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5078

'only_matching': True,

5079

}, {

5080

'url': 'https://www.youtube.com/TheYoungTurks/live',

5081

'only_matching': True,

5082

}, {

5083

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5090

}, {

5091

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5092

'only_matching': True,

5093

}, {

5094

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5095

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5096

'only_matching': True

5097

}, {

5098

'note': '/browse/ should redirect to /channel/',

5099

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5100

'only_matching': True

5101

}, {

5102

'note': 'VLPL, should redirect to playlist?list=PL...',

5103

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5104

'info_dict': {

5105

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5106

'uploader': 'NoCopyrightSounds',

5107

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5108

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5109

'title': 'NCS Releases',

5110

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5111

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5112

'modified_date': r're:\d{8}',

5113

'view_count': int,

5114

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5115

'tags': [],

5116

'channel': 'NoCopyrightSounds',

5117

},

5118

'playlist_mincount': 166,

5119

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5120

}, {

5121

'note': 'Topic, should redirect to playlist?list=UU...',

5122

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5123

'info_dict': {

5124

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5125

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5126

'title': 'Uploads from Royalty Free Music - Topic',

5127

'uploader': 'Royalty Free Music - Topic',

5128

'tags': [],

5129

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5130

'channel': 'Royalty Free Music - Topic',

5131

'view_count': int,

5132

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5133

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5134

'modified_date': r're:\d{8}',

5135

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5136

'description': '',

5137

},

5138

'expected_warnings': [

5139

'The URL does not have a videos tab',

5140

r'[Uu]navailable videos (are|will be) hidden',

5141

],

5142

'playlist_mincount': 101,

5143

}, {

5144

'note': 'Topic without a UU playlist',

5145

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5146

'info_dict': {

5147

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5148

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5149

'tags': [],

5150

},

5151

'expected_warnings': [

5152

'the playlist redirect gave error',

5153

],

5154

'playlist_mincount': 9,

5155

}, {

5156

'note': 'Youtube music Album',

5157

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5158

'info_dict': {

5159

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5160

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5165

'modified_date': r're:\d{8}',

5166

},

5167

'playlist_count': 50,

5168

}, {

5169

'note': 'unlisted single video playlist',

5170

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5171

'info_dict': {

5172

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5173

'uploader': 'colethedj',

5174

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5175

'title': 'yt-dlp unlisted playlist test',

5176

'availability': 'unlisted',

5177

'tags': [],

5178

'modified_date': '20211208',

5179

'channel': 'colethedj',

5180

'view_count': int,

5181

'description': '',

5182

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5183

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5184

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5189

'url': 'https://www.youtube.com/feed/recommended',

5190

'info_dict': {

5191

'id': 'recommended',

5192

'title': 'recommended',

5193

'tags': [],

5194

},

5195

'playlist_mincount': 50,

5196

'params': {

5197

'skip_download': True,

5198

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5199

},

5200

}, {

5201

'note': 'API Fallback: /videos tab, sorted by oldest first',

5202

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5203

'info_dict': {

5204

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5205

'title': 'Cody\'sLab - Videos',

5206

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5207

'uploader': 'Cody\'sLab',

5208

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5209

'channel': 'Cody\'sLab',

5210

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5211

'tags': [],

5212

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5213

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5214

'channel_follower_count': int

5215

},

5216

'playlist_mincount': 650,

5217

'params': {

5218

'skip_download': True,

5219

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5220

},

5221

}, {

5222

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5223

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5224

'info_dict': {

5225

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5226

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5227

'title': 'Uploads from Royalty Free Music - Topic',

5228

'uploader': 'Royalty Free Music - Topic',

5229

'modified_date': r're:\d{8}',

5230

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5231

'description': '',

5232

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5233

'tags': [],

5234

'channel': 'Royalty Free Music - Topic',

5235

'view_count': int,

5236

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5237

},

5238

'expected_warnings': [

5239

'does not have a videos tab',

5240

r'[Uu]navailable videos (are|will be) hidden',

5241

],

5242

'playlist_mincount': 101,

5243

'params': {

5244

'skip_download': True,

5245

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5246

},

5247

}, {

5248

'note': 'non-standard redirect to regional channel',

5249

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5250

'only_matching': True

5251

}, {

5252

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5253

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5254

'info_dict': {

5255

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5256

'modified_date': '20220407',

5257

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5258

'tags': [],

5259

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5260

'uploader': 'pukkandan',

5261

'availability': 'unlisted',

5262

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5263

'channel': 'pukkandan',

5264

'description': 'Test for collaborative playlist',

5265

'title': 'yt-dlp test - collaborative playlist',

5266

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5267

},

5268

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5273

return False if YoutubeIE.suitable(url) else super().suitable(url)

5274

5275

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5276

5277

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5278

def _real_extract(self, url, smuggled_data):

5279

item_id = self._match_id(url)

5280

url = compat_urlparse.urlunparse(

5281

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5282

compat_opts = self.get_param('compat_opts', [])

5283

5284

def get_mobj(url):

5285

mobj = self._URL_RE.match(url).groupdict()

5286

mobj.update((k, '') for k, v in mobj.items() if v is None)

5287

return mobj

5288

5289

mobj, redirect_warning = get_mobj(url), None

5290

# Youtube returns incomplete data if tabname is not lower case

5291

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5292

if is_channel:

5293

if smuggled_data.get('is_music_url'):

5294

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5295

item_id = item_id[2:]

5296

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5297

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5298

mdata = self._extract_tab_endpoint(

5299

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5300

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5301

get_all=False, expected_type=compat_str)

5302

if not murl:

5303

raise ExtractorError('Failed to resolve album to playlist')

5304

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5305

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5306

pre = f'https://www.youtube.com/channel/{item_id}'

5307

5308

original_tab_name = tab

5309

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5310

# Home URLs should redirect to /videos/

5311

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5312

'To download only the videos in the home page, add a "/featured" to the URL')

5313

tab = '/videos'

5314

5315

url = ''.join((pre, tab, post))

5316

mobj = get_mobj(url)

5317

5318

# Handle both video/playlist URLs

5319

qs = parse_qs(url)

5320

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5321

5322

if not video_id and mobj['not_channel'].startswith('watch'):

5323

if not playlist_id:

5324

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5325

raise ExtractorError('Unable to recognize tab page')

5326

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5327

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5328

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5329

mobj = get_mobj(url)

5330

5331

if video_id and playlist_id:

5332

if self.get_param('noplaylist'):

5333

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5334

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5335

ie=YoutubeIE.ie_key(), video_id=video_id)

5336

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5337

5338

data, ytcfg = self._extract_data(url, item_id)

5339

5340

# YouTube may provide a non-standard redirect to the regional channel

5341

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5342

redirect_url = traverse_obj(

5343

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5344

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5345

redirect_url = ''.join((

5346

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5347

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5348

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5349

5350

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5351

if tabs:

5352

selected_tab = self._extract_selected_tab(tabs)

5353

selected_tab_name = selected_tab.get('title', '').lower()

5354

if selected_tab_name == 'home':

5355

selected_tab_name = 'featured'

5356

requested_tab_name = mobj['tab'][1:]

5357

if 'no-youtube-channel-redirect' not in compat_opts:

5358

if requested_tab_name == 'live':

5359

# Live tab should have redirected to the video

5360

raise ExtractorError('The channel is not currently live', expected=True)

5361

if requested_tab_name not in ('', selected_tab_name):

5362

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5363

if not original_tab_name:

5364

if item_id[:2] == 'UC':

5365

# Topic channels don't have /videos. Use the equivalent playlist instead

5366

pl_id = f'UU{item_id[2:]}'

5367

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5368

try:

5369

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5370

except ExtractorError:

5371

redirect_warning += ' and the playlist redirect gave error'

5372

else:

5373

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5374

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5375

if selected_tab_name and selected_tab_name != requested_tab_name:

5376

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5377

else:

5378

raise ExtractorError(redirect_warning, expected=True)

5379

5380

if redirect_warning:

5381

self.to_screen(redirect_warning)

5382

self.write_debug(f'Final URL: {url}')

5383

5384

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5385

if 'no-youtube-unavailable-videos' not in compat_opts:

5386

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5387

self._extract_and_report_alerts(data, only_once=True)

5388

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5389

if tabs:

5390

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5391

5392

playlist = traverse_obj(

5393

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5394

if playlist:

5395

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5396

5397

video_id = traverse_obj(

5398

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5399

if video_id:

5400

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5401

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5402

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5403

ie=YoutubeIE.ie_key(), video_id=video_id)

5404

5405

raise ExtractorError('Unable to recognize tab page')

5406

5407

5408

class YoutubePlaylistIE(InfoExtractor):

5409

IE_DESC = 'YouTube playlists'

5410

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5421

)''' % {

5422

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5423

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5424

}

5425

IE_NAME = 'youtube:playlist'

5426

_TESTS = [{

5427

'note': 'issue #673',

5428

'url': 'PLBB231211A4F62143',

5429

'info_dict': {

5430

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5431

'id': 'PLBB231211A4F62143',

5432

'uploader': 'Wickman',

5433

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5434

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5435

'view_count': int,

5436

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5437

'modified_date': r're:\d{8}',

5438

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5439

'channel': 'Wickman',

5440

'tags': [],

5441

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5442

},

5443

'playlist_mincount': 29,

5444

}, {

5445

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5446

'info_dict': {

5447

'title': 'YDL_safe_search',

5448

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5449

},

5450

'playlist_count': 2,

5451

'skip': 'This playlist is private',

5452

}, {

5453

'note': 'embedded',

5454

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5459

'uploader': 'milan',

5460

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5461

'description': '',

5462

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5463

'tags': [],

5464

'modified_date': '20140919',

5465

'view_count': int,

5466

'channel': 'milan',

5467

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5468

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5469

},

5470

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5471

}, {

5472

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5473

'playlist_mincount': 654,

5474

'info_dict': {

5475

'title': '2018 Chinese New Singles (11/6 updated)',

5476

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5477

'uploader': 'LBK',

5478

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5479

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5480

'channel': 'LBK',

5481

'view_count': int,

5482

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5483

'tags': [],

5484

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5485

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5486

'modified_date': r're:\d{8}',

5487

},

5488

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5489

}, {

5490

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5491

'only_matching': True,

5492

}, {

5493

# music album playlist

5494

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5495

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5500

if YoutubeTabIE.suitable(url):

5501

return False

5502

from ..utils import parse_qs

5503

qs = parse_qs(url)

5504

if qs.get('v', [None])[0]:

5505

return False

5506

return super().suitable(url)

5507

5508

def _real_extract(self, url):

5509

playlist_id = self._match_id(url)

5510

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5511

url = update_url_query(

5512

'https://www.youtube.com/playlist',

5513

parse_qs(url) or {'list': playlist_id})

5514

if is_music_url:

5515

url = smuggle_url(url, {'is_music_url': True})

5516

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5517

5518

5519

class YoutubeYtBeIE(InfoExtractor):

5520

IE_DESC = 'youtu.be'

5521

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5522

_TESTS = [{

5523

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5528

'uploader': 'Backus-Page House Museum',

5529

'uploader_id': 'backuspagemuseum',

5530

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5531

'upload_date': '20161008',

5532

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5533

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5538

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5539

'channel': 'Backus-Page House Museum',

5540

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5541

'live_status': 'not_live',

5542

'view_count': int,

5543

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5544

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5550

},

5551

}, {

5552

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5553

'only_matching': True,

5554

}]

5555

5556

def _real_extract(self, url):

5557

mobj = self._match_valid_url(url)

5558

video_id = mobj.group('id')

5559

playlist_id = mobj.group('playlist_id')

5560

return self.url_result(

5561

update_url_query('https://www.youtube.com/watch', {

5562

'v': video_id,

5563

'list': playlist_id,

5564

'feature': 'youtu.be',

5565

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5566

5567

5568

class YoutubeLivestreamEmbedIE(InfoExtractor):

5569

IE_DESC = 'YouTube livestream embeds'

5570

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5571

_TESTS = [{

5572

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5573

'only_matching': True,

5574

}]

5575

5576

def _real_extract(self, url):

5577

channel_id = self._match_id(url)

5578

return self.url_result(

5579

f'https://www.youtube.com/channel/{channel_id}/live',

5580

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5581

5582

5583

class YoutubeYtUserIE(InfoExtractor):

5584

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5585

IE_NAME = 'youtube:user'

5586

_VALID_URL = r'ytuser:(?P<id>.+)'

5587

_TESTS = [{

5588

'url': 'ytuser:phihag',

5589

'only_matching': True,

5590

}]

5591

5592

def _real_extract(self, url):

5593

user_id = self._match_id(url)

5594

return self.url_result(

5595

'https://www.youtube.com/user/%s/videos' % user_id,

5596

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5597

5598

5599

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5600

IE_NAME = 'youtube:favorites'

5601

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5602

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5603

_LOGIN_REQUIRED = True

5604

_TESTS = [{

5605

'url': ':ytfav',

5606

'only_matching': True,

5607

}, {

5608

'url': ':ytfavorites',

5609

'only_matching': True,

5610

}]

5611

5612

def _real_extract(self, url):

5613

return self.url_result(

5614

'https://www.youtube.com/playlist?list=LL',

5615

ie=YoutubeTabIE.ie_key())

5616

5617

5618

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5619

IE_NAME = 'youtube:notif'

5620

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5621

_VALID_URL = r':ytnotif(?:ication)?s?'

5622

_LOGIN_REQUIRED = True

5623

_TESTS = [{

5624

'url': ':ytnotif',

5625

'only_matching': True,

5626

}, {

5627

'url': ':ytnotifications',

5628

'only_matching': True,

5629

}]

5630

5631

def _extract_notification_menu(self, response, continuation_list):

5632

notification_list = traverse_obj(

5633

response,

5634

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5635

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5636

expected_type=list) or []

5637

continuation_list[0] = None

5638

for item in notification_list:

5639

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5640

if entry:

5641

yield entry

5642

continuation = item.get('continuationItemRenderer')

5643

if continuation:

5644

continuation_list[0] = continuation

5645

5646

def _extract_notification_renderer(self, notification):

5647

video_id = traverse_obj(

5648

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5649

url = f'https://www.youtube.com/watch?v={video_id}'

5650

channel_id = None

5651

if not video_id:

5652

browse_ep = traverse_obj(

5653

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5654

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5655

post_id = self._search_regex(

5656

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5657

'post id', default=None)

5658

if not channel_id or not post_id:

5659

return

5660

# The direct /post url redirects to this in the browser

5661

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5662

5663

channel = traverse_obj(

5664

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5665

expected_type=str)

5666

notification_title = self._get_text(notification, 'shortMessage')

5667

if notification_title:

5668

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5669

# TODO: handle recommended videos

5670

title = self._search_regex(

5671

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5672

'video title', default=None)

5673

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5674

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5680

'video_id': video_id,

5681

'title': title,

5682

'channel_id': channel_id,

5683

'channel': channel,

5684

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5685

'upload_date': upload_date,

5686

}

5687

5688

def _notification_menu_entries(self, ytcfg):

5689

continuation_list = [None]

5690

response = None

5691

for page in itertools.count(1):

5692

ctoken = traverse_obj(

5693

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5694

response = self._extract_response(

5695

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5696

ep='notification/get_notification_menu', check_get_keys='actions',

5697

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5698

yield from self._extract_notification_menu(response, continuation_list)

5699

if not continuation_list[0]:

5700

break

5701

5702

def _real_extract(self, url):

5703

display_id = 'notifications'

5704

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5705

self._report_playlist_authcheck(ytcfg)

5706

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5707

5708

5709

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5710

IE_DESC = 'YouTube search'

5711

IE_NAME = 'youtube:search'

5712

_SEARCH_KEY = 'ytsearch'

5713

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5714

_TESTS = [{

5715

'url': 'ytsearch5:youtube-dl test video',

5716

'playlist_count': 5,

5717

'info_dict': {

5718

'id': 'youtube-dl test video',

5719

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5725

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5726

_SEARCH_KEY = 'ytsearchdate'

5727

IE_DESC = 'YouTube search, newest videos first'

5728

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5729

_TESTS = [{

5730

'url': 'ytsearchdate5:youtube-dl test video',

5731

'playlist_count': 5,

5732

'info_dict': {

5733

'id': 'youtube-dl test video',

5734

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5740

IE_DESC = 'YouTube search URLs with sorting and filter support'

5741

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5742

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5743

_TESTS = [{

5744

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5745

'playlist_mincount': 5,

5746

'info_dict': {

5747

'id': 'youtube-dl test video',

5748

'title': 'youtube-dl test video',

5749

}

5750

}, {

5751

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5752

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5759

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5770

'only_matching': True,

5771

}]

5772

5773

def _real_extract(self, url):

5774

qs = parse_qs(url)

5775

query = (qs.get('search_query') or qs.get('q'))[0]

5776

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5777

5778

5779

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5780

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5781

IE_NAME = 'youtube:music:search_url'

5782

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5783

_TESTS = [{

5784

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5785

'playlist_count': 16,

5786

'info_dict': {

5787

'id': 'royalty free music',

5788

'title': 'royalty free music',

5789

}

5790

}, {

5791

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5792

'playlist_mincount': 30,

5793

'info_dict': {

5794

'id': 'royalty free music - songs',

5795

'title': 'royalty free music - songs',

5796

},

5797

'params': {'extract_flat': 'in_playlist'}

5798

}, {

5799

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5800

'playlist_mincount': 30,

5801

'info_dict': {

5802

'id': 'royalty free music - community playlists',

5803

'title': 'royalty free music - community playlists',

5804

},

5805

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5810

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5811

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5812

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5813

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5814

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5815

}

5816

5817

def _real_extract(self, url):

5818

qs = parse_qs(url)

5819

query = (qs.get('search_query') or qs.get('q'))[0]

5820

params = qs.get('sp', (None,))[0]

5821

if params:

5822

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5823

else:

5824

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5825

params = self._SECTIONS.get(section)

5826

if not params:

5827

section = None

5828

title = join_nonempty(query, section, delim=' - ')

5829

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5830

5831

5832

class YoutubeFeedsInfoExtractor(InfoExtractor):

5833

"""

5834

Base class for feed extractors

5835

Subclasses must re-define the _FEED_NAME property.

5836

"""

5837

_LOGIN_REQUIRED = True

5838

_FEED_NAME = 'feeds'

5839

5840

def _real_initialize(self):

5841

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5846

5847

def _real_extract(self, url):

5848

return self.url_result(

5849

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5850

5851

5852

class YoutubeWatchLaterIE(InfoExtractor):

5853

IE_NAME = 'youtube:watchlater'

5854

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5855

_VALID_URL = r':ytwatchlater'

5856

_TESTS = [{

5857

'url': ':ytwatchlater',

5858

'only_matching': True,

5859

}]

5860

5861

def _real_extract(self, url):

5862

return self.url_result(

5863

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5864

5865

5866

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5867

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5868

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5869

_FEED_NAME = 'recommended'

5870

_LOGIN_REQUIRED = False

5871

_TESTS = [{

5872

'url': ':ytrec',

5873

'only_matching': True,

5874

}, {

5875

'url': ':ytrecommended',

5876

'only_matching': True,

5877

}, {

5878

'url': 'https://youtube.com',

5879

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5884

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5885

_VALID_URL = r':ytsub(?:scription)?s?'

5886

_FEED_NAME = 'subscriptions'

5887

_TESTS = [{

5888

'url': ':ytsubs',

5889

'only_matching': True,

5890

}, {

5891

'url': ':ytsubscriptions',

5892

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5897

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5898

_VALID_URL = r':ythis(?:tory)?'

5899

_FEED_NAME = 'history'

5900

_TESTS = [{

5901

'url': ':ythistory',

5902

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5907

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5908

IE_NAME = 'youtube:stories'

5909

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5910

_TESTS = [{

5911

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5912

'only_matching': True,

5913

}]

5914

5915

def _real_extract(self, url):

5916

playlist_id = f'RLTD{self._match_id(url)}'

5917

return self.url_result(

5918

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5919

ie=YoutubeTabIE, video_id=playlist_id)

5920

5921

5922

class YoutubeTruncatedURLIE(InfoExtractor):

5923

IE_NAME = 'youtube:truncated_url'

5924

IE_DESC = False # Do not list

5925

_VALID_URL = r'''(?x)

5926

(?:https?://)?

5927

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5928

(?:watch\?(?:

5929

feature=[a-z_]+|

5930

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5943

'only_matching': True,

5944

}, {

5945

'url': 'https://www.youtube.com/watch?',

5946

'only_matching': True,

5947

}, {

5948

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5949

'only_matching': True,

5950

}, {

5951

'url': 'https://www.youtube.com/watch?feature=foo',

5952

'only_matching': True,

5953

}, {

5954

'url': 'https://www.youtube.com/watch?hl=en-GB',

5955

'only_matching': True,

5956

}, {

5957

'url': 'https://www.youtube.com/watch?t=2372',

5958

'only_matching': True,

5959

}]

5960

5961

def _real_extract(self, url):

5962

raise ExtractorError(

5963

'Did you forget to quote the URL? Remember that & is a meta '

5964

'character in most shells, so you want to put the URL in quotes, '

5965

'like youtube-dl '

5966

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5967

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5972

IE_NAME = 'youtube:clip'

5973

IE_DESC = False # Do not list

5974

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5975

5976

def _real_extract(self, url):

5977

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5978

return self.url_result(url, 'Generic')

5979

5980

5981

class YoutubeTruncatedIDIE(InfoExtractor):

5982

IE_NAME = 'youtube:truncated_id'

5983

IE_DESC = False # Do not list

5984

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5985

5986

_TESTS = [{

5987

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5988

'only_matching': True,

5989

}]

5990

5991

def _real_extract(self, url):

5992

video_id = self._match_id(url)

5993

raise ExtractorError(

5994

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

5995

expected=True)