jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from ..compat import functools
	21	from ..jsinterp import JSInterpreter
	22	from ..utils import (
	23	NO_DEFAULT,
	24	ExtractorError,
	25	UserNotLive,
	26	bug_reports_message,
	27	classproperty,
	28	clean_html,
	29	datetime_from_str,
	30	dict_get,
	31	error_to_compat_str,
	32	float_or_none,
	33	format_field,
	34	get_first,
	35	int_or_none,
	36	is_html,
	37	join_nonempty,
	38	js_to_json,
	39	mimetype2ext,
	40	network_exceptions,
	41	orderedSet,
	42	parse_codecs,
	43	parse_count,
	44	parse_duration,
	45	parse_iso8601,
	46	parse_qs,
	47	qualities,
	48	remove_end,
	49	remove_start,
	50	smuggle_url,
	51	str_or_none,
	52	str_to_int,
	53	strftime_or_none,
	54	traverse_obj,
	55	try_get,
	56	unescapeHTML,
	57	unified_strdate,
	58	unified_timestamp,
	59	unsmuggle_url,
	60	update_url_query,
	61	url_or_none,
	62	urljoin,
	63	variadic,
	64	)
	65
	66	# any clients starting with _ cannot be explicitly requested by the user
	67	INNERTUBE_CLIENTS = {
	68	'web': {
	69	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	70	'INNERTUBE_CONTEXT': {
	71	'client': {
	72	'clientName': 'WEB',
	73	'clientVersion': '2.20211221.00.00',
	74	}
	75	},
	76	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	77	},
	78	'web_embedded': {
	79	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	80	'INNERTUBE_CONTEXT': {
	81	'client': {
	82	'clientName': 'WEB_EMBEDDED_PLAYER',
	83	'clientVersion': '1.20211215.00.01',
	84	},
	85	},
	86	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	87	},
	88	'web_music': {
	89	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	90	'INNERTUBE_HOST': 'music.youtube.com',
	91	'INNERTUBE_CONTEXT': {
	92	'client': {
	93	'clientName': 'WEB_REMIX',
	94	'clientVersion': '1.20211213.00.00',
	95	}
	96	},
	97	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	98	},
	99	'web_creator': {
	100	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	101	'INNERTUBE_CONTEXT': {
	102	'client': {
	103	'clientName': 'WEB_CREATOR',
	104	'clientVersion': '1.20211220.02.00',
	105	}
	106	},
	107	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	108	},
	109	'android': {
	110	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	111	'INNERTUBE_CONTEXT': {
	112	'client': {
	113	'clientName': 'ANDROID',
	114	'clientVersion': '16.49',
	115	}
	116	},
	117	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	118	'REQUIRE_JS_PLAYER': False
	119	},
	120	'android_embedded': {
	121	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	122	'INNERTUBE_CONTEXT': {
	123	'client': {
	124	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	125	'clientVersion': '16.49',
	126	},
	127	},
	128	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	129	'REQUIRE_JS_PLAYER': False
	130	},
	131	'android_music': {
	132	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	133	'INNERTUBE_CONTEXT': {
	134	'client': {
	135	'clientName': 'ANDROID_MUSIC',
	136	'clientVersion': '4.57',
	137	}
	138	},
	139	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	140	'REQUIRE_JS_PLAYER': False
	141	},
	142	'android_creator': {
	143	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	144	'INNERTUBE_CONTEXT': {
	145	'client': {
	146	'clientName': 'ANDROID_CREATOR',
	147	'clientVersion': '21.47',
	148	},
	149	},
	150	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	151	'REQUIRE_JS_PLAYER': False
	152	},
	153	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	154	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	155	'ios': {
	156	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	157	'INNERTUBE_CONTEXT': {
	158	'client': {
	159	'clientName': 'IOS',
	160	'clientVersion': '16.46',
	161	'deviceModel': 'iPhone14,3',
	162	}
	163	},
	164	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	165	'REQUIRE_JS_PLAYER': False
	166	},
	167	'ios_embedded': {
	168	'INNERTUBE_CONTEXT': {
	169	'client': {
	170	'clientName': 'IOS_MESSAGES_EXTENSION',
	171	'clientVersion': '16.46',
	172	'deviceModel': 'iPhone14,3',
	173	},
	174	},
	175	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	176	'REQUIRE_JS_PLAYER': False
	177	},
	178	'ios_music': {
	179	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MUSIC',
	183	'clientVersion': '4.57',
	184	},
	185	},
	186	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	187	'REQUIRE_JS_PLAYER': False
	188	},
	189	'ios_creator': {
	190	'INNERTUBE_CONTEXT': {
	191	'client': {
	192	'clientName': 'IOS_CREATOR',
	193	'clientVersion': '21.47',
	194	},
	195	},
	196	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	197	'REQUIRE_JS_PLAYER': False
	198	},
	199	# mweb has 'ultralow' formats
	200	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	201	'mweb': {
	202	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	203	'INNERTUBE_CONTEXT': {
	204	'client': {
	205	'clientName': 'MWEB',
	206	'clientVersion': '2.20211221.01.00',
	207	}
	208	},
	209	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	210	},
	211	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	212	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	213	'tv_embedded': {
	214	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	215	'INNERTUBE_CONTEXT': {
	216	'client': {
	217	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	218	'clientVersion': '2.0',
	219	},
	220	},
	221	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	222	},
	223	}
	224
	225
	226	def _split_innertube_client(client_name):
	227	variant, *base = client_name.rsplit('.', 1)
	228	if base:
	229	return variant, base[0], variant
	230	base, *variant = client_name.split('_', 1)
	231	return client_name, base, variant[0] if variant else None
	232
	233
	234	def build_innertube_clients():
	235	THIRD_PARTY = {
	236	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	237	}
	238	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	239	priority = qualities(BASE_CLIENTS[::-1])
	240
	241	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	242	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	243	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	244	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	245	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	246
	247	_, base_client, variant = _split_innertube_client(client)
	248	ytcfg['priority'] = 10 * priority(base_client)
	249
	250	if not variant:
	251	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	252	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	253	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	254	embedscreen['priority'] -= 3
	255	elif variant == 'embedded':
	256	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	257	ytcfg['priority'] -= 2
	258	else:
	259	ytcfg['priority'] -= 3
	260
	261
	262	build_innertube_clients()
	263
	264
	265	class YoutubeBaseInfoExtractor(InfoExtractor):
	266	"""Provide base functions for Youtube extractors"""
	267
	268	_RESERVED_NAMES = (
	269	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	270	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	271	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	272	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	273
	274	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	275
	276	# _NETRC_MACHINE = 'youtube'
	277
	278	# If True it will raise an error if no login info is provided
	279	_LOGIN_REQUIRED = False
	280
	281	_INVIDIOUS_SITES = (
	282	# invidious-redirect websites
	283	r'(?:www\.)?redirect\.invidious\.io',
	284	r'(?:(?:www\|dev)\.)?invidio\.us',
	285	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	286	r'(?:www\.)?invidious\.pussthecat\.org',
	287	r'(?:www\.)?invidious\.zee\.li',
	288	r'(?:www\.)?invidious\.ethibox\.fr',
	289	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	290	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	291	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	292	# youtube-dl invidious instances list
	293	r'(?:(?:www\|no)\.)?invidiou\.sh',
	294	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	295	r'(?:www\.)?invidious\.kabi\.tk',
	296	r'(?:www\.)?invidious\.mastodon\.host',
	297	r'(?:www\.)?invidious\.zapashcanon\.fr',
	298	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	299	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	300	r'(?:www\.)?invidious\.himiko\.cloud',
	301	r'(?:www\.)?invidious\.reallyancient\.tech',
	302	r'(?:www\.)?invidious\.tube',
	303	r'(?:www\.)?invidiou\.site',
	304	r'(?:www\.)?invidious\.site',
	305	r'(?:www\.)?invidious\.xyz',
	306	r'(?:www\.)?invidious\.nixnet\.xyz',
	307	r'(?:www\.)?invidious\.048596\.xyz',
	308	r'(?:www\.)?invidious\.drycat\.fr',
	309	r'(?:www\.)?inv\.skyn3t\.in',
	310	r'(?:www\.)?tube\.poal\.co',
	311	r'(?:www\.)?tube\.connect\.cafe',
	312	r'(?:www\.)?vid\.wxzm\.sx',
	313	r'(?:www\.)?vid\.mint\.lgbt',
	314	r'(?:www\.)?vid\.puffyan\.us',
	315	r'(?:www\.)?yewtu\.be',
	316	r'(?:www\.)?yt\.elukerio\.org',
	317	r'(?:www\.)?yt\.lelux\.fi',
	318	r'(?:www\.)?invidious\.ggc-project\.de',
	319	r'(?:www\.)?yt\.maisputain\.ovh',
	320	r'(?:www\.)?ytprivate\.com',
	321	r'(?:www\.)?invidious\.13ad\.de',
	322	r'(?:www\.)?invidious\.toot\.koeln',
	323	r'(?:www\.)?invidious\.fdn\.fr',
	324	r'(?:www\.)?watch\.nettohikari\.com',
	325	r'(?:www\.)?invidious\.namazso\.eu',
	326	r'(?:www\.)?invidious\.silkky\.cloud',
	327	r'(?:www\.)?invidious\.exonip\.de',
	328	r'(?:www\.)?invidious\.riverside\.rocks',
	329	r'(?:www\.)?invidious\.blamefran\.net',
	330	r'(?:www\.)?invidious\.moomoo\.de',
	331	r'(?:www\.)?ytb\.trom\.tf',
	332	r'(?:www\.)?yt\.cyberhost\.uk',
	333	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	334	r'(?:www\.)?qklhadlycap4cnod\.onion',
	335	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	336	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	337	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	338	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	339	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	340	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	341	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	342	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	343	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	344	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	345	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	346	r'(?:www\.)?piped\.kavin\.rocks',
	347	r'(?:www\.)?piped\.silkky\.cloud',
	348	r'(?:www\.)?piped\.tokhmi\.xyz',
	349	r'(?:www\.)?piped\.moomoo\.me',
	350	r'(?:www\.)?il\.ax',
	351	r'(?:www\.)?piped\.syncpundit\.com',
	352	r'(?:www\.)?piped\.mha\.fi',
	353	r'(?:www\.)?piped\.mint\.lgbt',
	354	r'(?:www\.)?piped\.privacy\.com\.de',
	355	)
	356
	357	def _initialize_consent(self):
	358	cookies = self._get_cookies('https://www.youtube.com/')
	359	if cookies.get('__Secure-3PSID'):
	360	return
	361	consent_id = None
	362	consent = cookies.get('CONSENT')
	363	if consent:
	364	if 'YES' in consent.value:
	365	return
	366	consent_id = self._search_regex(
	367	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	368	if not consent_id:
	369	consent_id = random.randint(100, 999)
	370	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	371
	372	def _initialize_pref(self):
	373	cookies = self._get_cookies('https://www.youtube.com/')
	374	pref_cookie = cookies.get('PREF')
	375	pref = {}
	376	if pref_cookie:
	377	try:
	378	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	379	except ValueError:
	380	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	381	pref.update({'hl': 'en', 'tz': 'UTC'})
	382	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	383
	384	def _real_initialize(self):
	385	self._initialize_pref()
	386	self._initialize_consent()
	387	self._check_login_required()
	388
	389	def _check_login_required(self):
	390	if self._LOGIN_REQUIRED and not self._cookies_passed:
	391	self.raise_login_required('Login details are needed to download this content', method='cookies')
	392
	393	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	394	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	395
	396	def _get_default_ytcfg(self, client='web'):
	397	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	398
	399	def _get_innertube_host(self, client='web'):
	400	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	401
	402	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	403	# try_get but with fallback to default ytcfg client values when present
	404	_func = lambda y: try_get(y, getter, expected_type)
	405	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	406
	407	def _extract_client_name(self, ytcfg, default_client='web'):
	408	return self._ytcfg_get_safe(
	409	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	410	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	411
	412	def _extract_client_version(self, ytcfg, default_client='web'):
	413	return self._ytcfg_get_safe(
	414	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	415	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	416
	417	def _select_api_hostname(self, req_api_hostname, default_client=None):
	418	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	419	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	420
	421	def _extract_api_key(self, ytcfg=None, default_client='web'):
	422	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	423
	424	def _extract_context(self, ytcfg=None, default_client='web'):
	425	context = get_first(
	426	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	427	# Enforce language and tz for extraction
	428	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	429	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	430	return context
	431
	432	_SAPISID = None
	433
	434	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	435	time_now = round(time.time())
	436	if self._SAPISID is None:
	437	yt_cookies = self._get_cookies('https://www.youtube.com')
	438	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	439	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	440	sapisid_cookie = dict_get(
	441	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	442	if sapisid_cookie and sapisid_cookie.value:
	443	self._SAPISID = sapisid_cookie.value
	444	self.write_debug('Extracted SAPISID cookie')
	445	# SAPISID cookie is required if not already present
	446	if not yt_cookies.get('SAPISID'):
	447	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	448	self._set_cookie(
	449	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	450	else:
	451	self._SAPISID = False
	452	if not self._SAPISID:
	453	return None
	454	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	455	sapisidhash = hashlib.sha1(
	456	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	457	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	458
	459	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	460	note='Downloading API JSON', errnote='Unable to download API page',
	461	context=None, api_key=None, api_hostname=None, default_client='web'):
	462
	463	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	464	data.update(query)
	465	real_headers = self.generate_api_headers(default_client=default_client)
	466	real_headers.update({'content-type': 'application/json'})
	467	if headers:
	468	real_headers.update(headers)
	469	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	470	or api_key or self._extract_api_key(default_client=default_client))
	471	return self._download_json(
	472	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	473	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	474	data=json.dumps(data).encode('utf8'), headers=real_headers,
	475	query={'key': api_key, 'prettyPrint': 'false'})
	476
	477	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	478	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	479
	480	@staticmethod
	481	def _extract_session_index(*data):
	482	"""
	483	Index of current account in account list.
	484	See: https://github.com/yt-dlp/yt-dlp/pull/519
	485	"""
	486	for ytcfg in data:
	487	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	488	if session_index is not None:
	489	return session_index
	490
	491	# Deprecated?
	492	def _extract_identity_token(self, ytcfg=None, webpage=None):
	493	if ytcfg:
	494	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	495	if token:
	496	return token
	497	if webpage:
	498	return self._search_regex(
	499	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	500	'identity token', default=None, fatal=False)

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from ..compat import functools

21

from ..jsinterp import JSInterpreter

22

from ..utils import (

NO_DEFAULT,

ExtractorError,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

67

INNERTUBE_CLIENTS = {

68

'web': {

69

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

70

'INNERTUBE_CONTEXT': {

71

'client': {

72

'clientName': 'WEB',

73

'clientVersion': '2.20211221.00.00',

74

}

75

},

76

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

77

},

78

'web_embedded': {

79

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

80

'INNERTUBE_CONTEXT': {

81

'client': {

82

'clientName': 'WEB_EMBEDDED_PLAYER',

83

'clientVersion': '1.20211215.00.01',

84

},

85

},

86

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

87

},

88

'web_music': {

89

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

90

'INNERTUBE_HOST': 'music.youtube.com',

91

'INNERTUBE_CONTEXT': {

92

'client': {

93

'clientName': 'WEB_REMIX',

94

'clientVersion': '1.20211213.00.00',

95

}

96

},

97

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

98

},

99

'web_creator': {

100

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

101

'INNERTUBE_CONTEXT': {

102

'client': {

103

'clientName': 'WEB_CREATOR',

104

'clientVersion': '1.20211220.02.00',

105

}

106

},

107

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

108

},

109

'android': {

110

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

111

'INNERTUBE_CONTEXT': {

112

'client': {

113

'clientName': 'ANDROID',

114

'clientVersion': '16.49',

115

}

116

},

117

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

118

'REQUIRE_JS_PLAYER': False

119

},

120

'android_embedded': {

121

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

122

'INNERTUBE_CONTEXT': {

123

'client': {

124

'clientName': 'ANDROID_EMBEDDED_PLAYER',

125

'clientVersion': '16.49',

126

},

127

},

128

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

129

'REQUIRE_JS_PLAYER': False

130

},

131

'android_music': {

132

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

133

'INNERTUBE_CONTEXT': {

134

'client': {

135

'clientName': 'ANDROID_MUSIC',

136

'clientVersion': '4.57',

137

}

138

},

139

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

140

'REQUIRE_JS_PLAYER': False

141

},

142

'android_creator': {

143

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

144

'INNERTUBE_CONTEXT': {

145

'client': {

146

'clientName': 'ANDROID_CREATOR',

147

'clientVersion': '21.47',

148

},

149

},

150

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

151

'REQUIRE_JS_PLAYER': False

152

},

153

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

154

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

155

'ios': {

156

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

157

'INNERTUBE_CONTEXT': {

158

'client': {

159

'clientName': 'IOS',

160

'clientVersion': '16.46',

161

'deviceModel': 'iPhone14,3',

162

}

163

},

164

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

165

'REQUIRE_JS_PLAYER': False

166

},

167

'ios_embedded': {

168

'INNERTUBE_CONTEXT': {

169

'client': {

170

'clientName': 'IOS_MESSAGES_EXTENSION',

171

'clientVersion': '16.46',

172

'deviceModel': 'iPhone14,3',

173

},

174

},

175

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

176

'REQUIRE_JS_PLAYER': False

177

},

178

'ios_music': {

179

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MUSIC',

183

'clientVersion': '4.57',

184

},

185

},

186

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

187

'REQUIRE_JS_PLAYER': False

188

},

189

'ios_creator': {

190

'INNERTUBE_CONTEXT': {

191

'client': {

192

'clientName': 'IOS_CREATOR',

193

'clientVersion': '21.47',

194

},

195

},

196

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

197

'REQUIRE_JS_PLAYER': False

198

},

199

# mweb has 'ultralow' formats

200

# See: https://github.com/yt-dlp/yt-dlp/pull/557

201

'mweb': {

202

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

203

'INNERTUBE_CONTEXT': {

204

'client': {

205

'clientName': 'MWEB',

206

'clientVersion': '2.20211221.01.00',

207

}

208

},

209

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

210

},

211

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

212

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

213

'tv_embedded': {

214

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

215

'INNERTUBE_CONTEXT': {

216

'client': {

217

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

218

'clientVersion': '2.0',

219

},

220

},

221

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

227

variant, *base = client_name.rsplit('.', 1)

228

if base:

229

return variant, base[0], variant

230

base, *variant = client_name.split('_', 1)

231

return client_name, base, variant[0] if variant else None

232

233

234

def build_innertube_clients():

235

THIRD_PARTY = {

236

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

237

}

238

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

239

priority = qualities(BASE_CLIENTS[::-1])

240

241

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

242

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

243

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

244

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

245

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

246

247

_, base_client, variant = _split_innertube_client(client)

248

ytcfg['priority'] = 10 * priority(base_client)

249

250

if not variant:

251

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

252

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

253

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

254

embedscreen['priority'] -= 3

255

elif variant == 'embedded':

256

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

257

ytcfg['priority'] -= 2

258

else:

259

ytcfg['priority'] -= 3

260

261

262

build_innertube_clients()

263

264

265

class YoutubeBaseInfoExtractor(InfoExtractor):

266

"""Provide base functions for Youtube extractors"""

267

268

_RESERVED_NAMES = (

269

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

275

276

# _NETRC_MACHINE = 'youtube'

277

278

# If True it will raise an error if no login info is provided

279

_LOGIN_REQUIRED = False

280

281

_INVIDIOUS_SITES = (

282

# invidious-redirect websites

283

r'(?:www\.)?redirect\.invidious\.io',

284

r'(?:(?:www|dev)\.)?invidio\.us',

285

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

286

r'(?:www\.)?invidious\.pussthecat\.org',

287

r'(?:www\.)?invidious\.zee\.li',

288

r'(?:www\.)?invidious\.ethibox\.fr',

289

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

290

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

291

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

292

# youtube-dl invidious instances list

293

r'(?:(?:www|no)\.)?invidiou\.sh',

294

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

295

r'(?:www\.)?invidious\.kabi\.tk',

296

r'(?:www\.)?invidious\.mastodon\.host',

297

r'(?:www\.)?invidious\.zapashcanon\.fr',

298

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

299

r'(?:www\.)?invidious\.tinfoil-hat\.net',

300

r'(?:www\.)?invidious\.himiko\.cloud',

301

r'(?:www\.)?invidious\.reallyancient\.tech',

302

r'(?:www\.)?invidious\.tube',

303

r'(?:www\.)?invidiou\.site',

304

r'(?:www\.)?invidious\.site',

305

r'(?:www\.)?invidious\.xyz',

306

r'(?:www\.)?invidious\.nixnet\.xyz',

307

r'(?:www\.)?invidious\.048596\.xyz',

308

r'(?:www\.)?invidious\.drycat\.fr',

309

r'(?:www\.)?inv\.skyn3t\.in',

310

r'(?:www\.)?tube\.poal\.co',

311

r'(?:www\.)?tube\.connect\.cafe',

312

r'(?:www\.)?vid\.wxzm\.sx',

313

r'(?:www\.)?vid\.mint\.lgbt',

314

r'(?:www\.)?vid\.puffyan\.us',

315

r'(?:www\.)?yewtu\.be',

316

r'(?:www\.)?yt\.elukerio\.org',

317

r'(?:www\.)?yt\.lelux\.fi',

318

r'(?:www\.)?invidious\.ggc-project\.de',

319

r'(?:www\.)?yt\.maisputain\.ovh',

320

r'(?:www\.)?ytprivate\.com',

321

r'(?:www\.)?invidious\.13ad\.de',

322

r'(?:www\.)?invidious\.toot\.koeln',

323

r'(?:www\.)?invidious\.fdn\.fr',

324

r'(?:www\.)?watch\.nettohikari\.com',

325

r'(?:www\.)?invidious\.namazso\.eu',

326

r'(?:www\.)?invidious\.silkky\.cloud',

327

r'(?:www\.)?invidious\.exonip\.de',

328

r'(?:www\.)?invidious\.riverside\.rocks',

329

r'(?:www\.)?invidious\.blamefran\.net',

330

r'(?:www\.)?invidious\.moomoo\.de',

331

r'(?:www\.)?ytb\.trom\.tf',

332

r'(?:www\.)?yt\.cyberhost\.uk',

333

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

334

r'(?:www\.)?qklhadlycap4cnod\.onion',

335

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

336

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

337

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

338

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

339

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

340

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

341

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

342

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

343

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

344

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

345

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

346

r'(?:www\.)?piped\.kavin\.rocks',

347

r'(?:www\.)?piped\.silkky\.cloud',

348

r'(?:www\.)?piped\.tokhmi\.xyz',

349

r'(?:www\.)?piped\.moomoo\.me',

350

r'(?:www\.)?il\.ax',

351

r'(?:www\.)?piped\.syncpundit\.com',

352

r'(?:www\.)?piped\.mha\.fi',

353

r'(?:www\.)?piped\.mint\.lgbt',

354

r'(?:www\.)?piped\.privacy\.com\.de',

355

)

356

357

def _initialize_consent(self):

358

cookies = self._get_cookies('https://www.youtube.com/')

359

if cookies.get('__Secure-3PSID'):

360

return

361

consent_id = None

362

consent = cookies.get('CONSENT')

363

if consent:

364

if 'YES' in consent.value:

365

return

366

consent_id = self._search_regex(

367

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

368

if not consent_id:

369

consent_id = random.randint(100, 999)

370

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

371

372

def _initialize_pref(self):

373

cookies = self._get_cookies('https://www.youtube.com/')

374

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

379

except ValueError:

380

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

381

pref.update({'hl': 'en', 'tz': 'UTC'})

382

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

383

384

def _real_initialize(self):

385

self._initialize_pref()

386

self._initialize_consent()

387

self._check_login_required()

388

389

def _check_login_required(self):

390

if self._LOGIN_REQUIRED and not self._cookies_passed:

391

self.raise_login_required('Login details are needed to download this content', method='cookies')

392

393

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

394

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

395

396

def _get_default_ytcfg(self, client='web'):

397

return copy.deepcopy(INNERTUBE_CLIENTS[client])

398

399

def _get_innertube_host(self, client='web'):

400

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

401

402

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

403

# try_get but with fallback to default ytcfg client values when present

404

_func = lambda y: try_get(y, getter, expected_type)

405

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

406

407

def _extract_client_name(self, ytcfg, default_client='web'):

408

return self._ytcfg_get_safe(

409

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

410

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

411

412

def _extract_client_version(self, ytcfg, default_client='web'):

413

return self._ytcfg_get_safe(

414

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

415

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

416

417

def _select_api_hostname(self, req_api_hostname, default_client=None):

418

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

419

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

420

421

def _extract_api_key(self, ytcfg=None, default_client='web'):

422

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

423

424

def _extract_context(self, ytcfg=None, default_client='web'):

425

context = get_first(

426

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

427

# Enforce language and tz for extraction

428

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

429

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

435

time_now = round(time.time())

436

if self._SAPISID is None:

437

yt_cookies = self._get_cookies('https://www.youtube.com')

438

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

439

# See: https://github.com/yt-dlp/yt-dlp/issues/393

440

sapisid_cookie = dict_get(

441

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

442

if sapisid_cookie and sapisid_cookie.value:

443

self._SAPISID = sapisid_cookie.value

444

self.write_debug('Extracted SAPISID cookie')

445

# SAPISID cookie is required if not already present

446

if not yt_cookies.get('SAPISID'):

447

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

448

self._set_cookie(

449

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

450

else:

451

self._SAPISID = False

452

if not self._SAPISID:

453

return None

454

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

455

sapisidhash = hashlib.sha1(

456

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

457

return f'SAPISIDHASH {time_now}_{sapisidhash}'

458

459

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

460

note='Downloading API JSON', errnote='Unable to download API page',

461

context=None, api_key=None, api_hostname=None, default_client='web'):

462

463

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

464

data.update(query)

465

real_headers = self.generate_api_headers(default_client=default_client)

466

real_headers.update({'content-type': 'application/json'})

467

if headers:

468

real_headers.update(headers)

469

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

470

or api_key or self._extract_api_key(default_client=default_client))

471

return self._download_json(

472

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

473

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

474

data=json.dumps(data).encode('utf8'), headers=real_headers,

475

query={'key': api_key, 'prettyPrint': 'false'})

476

477

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

478

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

479

480

@staticmethod

481

def _extract_session_index(*data):

482

"""

483

Index of current account in account list.

484

See: https://github.com/yt-dlp/yt-dlp/pull/519

485

"""

486

for ytcfg in data:

487

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

488

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

493

if ytcfg:

494

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

499

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

500

'identity token', default=None, fatal=False)

501

502

@staticmethod

503

def _extract_account_syncid(*args):

504

"""

505

Extract syncId required to download private playlists of secondary channels

506

@params response and/or ytcfg

507

"""

508

for data in args:

509

# ytcfg includes channel_syncid if on secondary channel

510

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

515

lambda x: x['DATASYNC_ID']), str) or '').split('||')

516

if len(sync_ids) >= 2 and sync_ids[1]:

517

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

518

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

523

"""

524

Extracts visitorData from an API response or ytcfg

525

Appears to be used to track session state

526

"""

527

return get_first(

528

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

529

expected_type=str)

530

531

@functools.cached_property

532

def is_authenticated(self):

533

return bool(self._generate_sapisidhash_header())

534

535

def extract_ytcfg(self, video_id, webpage):

536

if not webpage:

537

return {}

538

return self._parse_json(

539

self._search_regex(

540

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

541

default='{}'), video_id, fatal=False) or {}

542

543

def generate_api_headers(

544

self, *, ytcfg=None, account_syncid=None, session_index=None,

545

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

546

547

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

548

headers = {

549

'X-YouTube-Client-Name': str(

550

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

551

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

552

'Origin': origin,

553

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

554

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

555

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

556

}

557

if session_index is None:

558

session_index = self._extract_session_index(ytcfg)

559

if account_syncid or session_index is not None:

560

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

561

562

auth = self._generate_sapisidhash_header(origin)

563

if auth is not None:

564

headers['Authorization'] = auth

565

headers['X-Origin'] = origin

566

return {h: v for h, v in headers.items() if v is not None}

567

568

def _download_ytcfg(self, client, video_id):

569

url = {

570

'web': 'https://www.youtube.com',

571

'web_music': 'https://music.youtube.com',

572

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

577

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

578

return self.extract_ytcfg(video_id, webpage) or {}

579

580

@staticmethod

581

def _build_api_continuation_query(continuation, ctp=None):

582

query = {

583

'continuation': continuation

584

}

585

# TODO: Inconsistency with clickTrackingParams.

586

# Currently we have a fixed ctp contained within context (from ytcfg)

587

# and a ctp in root query for continuation.

588

if ctp:

589

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

594

next_continuation = try_get(

595

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

596

lambda x: x['continuation']['reloadContinuationData']), dict)

597

if not next_continuation:

598

return

599

continuation = next_continuation.get('continuation')

600

if not continuation:

601

return

602

ctp = next_continuation.get('clickTrackingParams')

603

return cls._build_api_continuation_query(continuation, ctp)

604

605

@classmethod

606

def _extract_continuation_ep_data(cls, continuation_ep: dict):

607

if isinstance(continuation_ep, dict):

608

continuation = try_get(

609

continuation_ep, lambda x: x['continuationCommand']['token'], str)

610

if not continuation:

611

return

612

ctp = continuation_ep.get('clickTrackingParams')

613

return cls._build_api_continuation_query(continuation, ctp)

614

615

@classmethod

616

def _extract_continuation(cls, renderer):

617

next_continuation = cls._extract_next_continuation_data(renderer)

618

if next_continuation:

619

return next_continuation

620

621

contents = []

622

for key in ('contents', 'items'):

623

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

624

625

for content in contents:

626

if not isinstance(content, dict):

627

continue

628

continuation_ep = try_get(

629

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

630

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

631

dict)

632

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

638

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

639

if not isinstance(alert_dict, dict):

640

continue

641

for alert in alert_dict.values():

642

alert_type = alert.get('type')

643

if not alert_type:

644

continue

645

message = cls._get_text(alert, 'text')

646

if message:

647

yield alert_type, message

648

649

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

650

errors = []

651

warnings = []

652

for alert_type, alert_message in alerts:

653

if alert_type.lower() == 'error' and fatal:

654

errors.append([alert_type, alert_message])

655

else:

656

warnings.append([alert_type, alert_message])

657

658

for alert_type, alert_message in (warnings + errors[:-1]):

659

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

660

if errors:

661

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

662

663

def _extract_and_report_alerts(self, data, *args, **kwargs):

664

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

665

666

def _extract_badges(self, renderer: dict):

667

badges = set()

668

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

669

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

670

if label:

671

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

676

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

681

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

682

obj = [obj]

683

for item in obj:

684

text = try_get(item, lambda x: x['simpleText'], str)

685

if text:

686

return text

687

runs = try_get(item, lambda x: x['runs'], list) or []

688

if not runs and isinstance(item, list):

689

runs = item

690

691

runs = runs[:min(len(runs), max_runs or len(runs))]

692

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

697

count_text = self._get_text(data, *path_list) or ''

698

count = parse_count(count_text)

699

if count is None:

700

count = str_to_int(

701

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

706

"""

707

Extract thumbnails from thumbnails dict

708

@param path_list: path list to level that contains 'thumbnails' key

709

"""

710

thumbnails = []

711

for path in path_list or [()]:

712

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

713

thumbnail_url = url_or_none(thumbnail.get('url'))

714

if not thumbnail_url:

715

continue

716

# Sometimes youtube gives a wrong thumbnail URL. See:

717

# https://github.com/yt-dlp/yt-dlp/issues/233

718

# https://github.com/ytdl-org/youtube-dl/issues/28023

719

if 'maxresdefault' in thumbnail_url:

720

thumbnail_url = thumbnail_url.split('?')[0]

721

thumbnails.append({

722

'url': thumbnail_url,

723

'height': int_or_none(thumbnail.get('height')),

724

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

730

"""

731

Extracts a relative time from string and converts to dt object

732

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

737

if start:

738

return datetime_from_str(start)

739

try:

740

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

745

"""@returns (timestamp, time_text)"""

746

text = self._get_text(renderer, *path_list) or ''

747

dt = self.extract_relative_time(text)

748

timestamp = None

749

if isinstance(dt, datetime.datetime):

750

timestamp = calendar.timegm(dt.timetuple())

751

752

if timestamp is None:

753

timestamp = (

754

unified_timestamp(text) or unified_timestamp(

755

self._search_regex(

756

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

757

text.lower(), 'time text', default=None)))

758

759

if text and timestamp is None:

760

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

761

return timestamp, text

762

763

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

764

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

765

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

770

if check_get_keys is None:

771

check_get_keys = []

772

while count < retries:

773

count += 1

774

if last_error:

775

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

776

try:

777

response = self._call_api(

778

ep=ep, fatal=True, headers=headers,

779

video_id=item_id, query=query,

780

context=self._extract_context(ytcfg, default_client),

781

api_key=self._extract_api_key(ytcfg, default_client),

782

api_hostname=api_hostname, default_client=default_client,

783

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

784

except ExtractorError as e:

785

if isinstance(e.cause, network_exceptions):

786

if isinstance(e.cause, urllib.error.HTTPError):

787

first_bytes = e.cause.read(512)

788

if not is_html(first_bytes):

789

yt_error = try_get(

790

self._parse_json(

791

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

792

lambda x: x['error']['message'], str)

793

if yt_error:

794

self._report_alerts([('ERROR', yt_error)], fatal=False)

795

# Downloading page may result in intermittent 5xx HTTP error

796

# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289

797

# We also want to catch all other network exceptions since errors in later pages can be troublesome

798

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

799

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

800

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

812

except ExtractorError as e:

813

# YouTube servers may return errors we want to retry on in a 200 OK response

814

# See: https://github.com/yt-dlp/yt-dlp/issues/839

815

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

821

return

822

if not check_get_keys or dict_get(response, check_get_keys):

823

break

824

# Youtube sometimes sends incomplete data

825

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

826

last_error = 'Incomplete data received'

827

if count >= retries:

828

if fatal:

829

raise ExtractorError(last_error)

830

else:

831

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

837

return re.match(r'https?://music\.youtube\.com/', url) is not None

838

839

def _extract_video(self, renderer):

840

video_id = renderer.get('videoId')

841

title = self._get_text(renderer, 'title')

842

description = self._get_text(renderer, 'descriptionSnippet')

843

duration = parse_duration(self._get_text(

844

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

845

if duration is None:

846

duration = parse_duration(self._search_regex(

847

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

848

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

849

video_id, default=None, group='duration'))

850

851

view_count = self._get_count(renderer, 'viewCountText')

852

853

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

854

channel_id = traverse_obj(

855

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

856

expected_type=str, get_all=False)

857

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

858

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

859

overlay_style = traverse_obj(

860

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

861

get_all=False, expected_type=str)

862

badges = self._extract_badges(renderer)

863

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

864

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

865

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

866

expected_type=str)) or ''

867

url = f'https://www.youtube.com/watch?v={video_id}'

868

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

869

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

878

'duration': duration,

879

'view_count': view_count,

880

'uploader': uploader,

881

'channel_id': channel_id,

882

'thumbnails': thumbnails,

883

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

884

if self._configuration_arg('approximate_date', ie_key='youtubetab')

885

else None),

886

'live_status': ('is_upcoming' if scheduled_timestamp is not None

887

else 'was_live' if 'streamed' in time_text.lower()

888

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

889

else None),

890

'release_timestamp': scheduled_timestamp,

891

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

896

IE_DESC = 'YouTube'

897

_VALID_URL = r"""(?x)^

898

(

899

(?:https?://|//) # http(s):// or protocol-independent URL

900

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

901

(?:www\.)?deturl\.com/www\.youtube\.com|

902

(?:www\.)?pwnyoutube\.com|

903

(?:www\.)?hooktube\.com|

904

(?:www\.)?yourepeat\.com|

905

tube\.majestyc\.net|

906

%(invidious)s|

907

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

908

(?:.*?\#/)? # handle anchor (#/) redirect urls

909

(?: # the various things that can precede the ID:

910

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

911

|(?: # or the v= param in all its forms

912

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

913

(?:\?|\#!?) # the params delimiter ? or # or #!

914

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

920

vid\.plus| # or vid.plus/xxxx

921

zwearz\.com/watch| # or zwearz.com/watch/xxxx

922

%(invidious)s

923

)/

924

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

925

)

926

)? # all until now is optional -> you can pass the naked ID

927

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

928

(?(1).+)? # if we found the ID, everything can follow

929

(?:\#|$)""" % {

930

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

931

}

932

_PLAYER_INFO_RE = (

933

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

934

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

935

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

936

)

937

_formats = {

938

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

939

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

940

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

941

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

942

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

943

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

944

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

945

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

946

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

947

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

948

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

949

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

950

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

951

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

952

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

953

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

954

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

955

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

960

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

961

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

962

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

963

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

964

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

965

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

966

967

# Apple HTTP Live Streaming

968

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

969

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

970

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

971

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

972

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

973

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

974

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

975

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

976

977

# DASH mp4 video

978

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

979

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

983

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

984

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

988

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

989

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

990

991

# Dash mp4 audio

992

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

993

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

994

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

995

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

996

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

997

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

998

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

999

1000

# Dash webm

1001

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1003

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1004

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1005

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1006

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1007

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1008

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1012

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1014

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1016

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1017

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1019

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1020

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1021

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1023

1024

# Dash webm audio

1025

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1026

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1027

1028

# Dash webm audio with opus inside

1029

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1030

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1031

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1032

1033

# RTMP (unnamed)

1034

'_rtmp': {'protocol': 'rtmp'},

1035

1036

# av01 video only formats sometimes served with "unknown" codecs

1037

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1038

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1039

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1040

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1041

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1042

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1043

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1044

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1045

}

1046

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1058

'uploader': 'Philipp Hagemeister',

1059

'uploader_id': 'phihag',

1060

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1061

'channel': 'Philipp Hagemeister',

1062

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1063

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1064

'upload_date': '20121002',

1065

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1066

'categories': ['Science & Technology'],

1067

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1072

'playable_in_embed': True,

1073

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1074

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1079

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1084

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1089

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1090

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1091

'uploader': 'SET India',

1092

'uploader_id': 'setindia',

1093

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1094

'age_limit': 18,

1095

},

1096

'skip': 'Private video',

1097

},

1098

{

1099

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1100

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1105

'uploader': 'Philipp Hagemeister',

1106

'uploader_id': 'phihag',

1107

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1108

'channel': 'Philipp Hagemeister',

1109

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1110

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1111

'upload_date': '20121002',

1112

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1113

'categories': ['Science & Technology'],

1114

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1119

'playable_in_embed': True,

1120

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1121

'live_status': 'not_live',

1122

'age_limit': 0,

1123

'comment_count': int,

1124

'channel_follower_count': int

1125

},

1126

'params': {

1127

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1132

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1137

'uploader_id': '8KVIDEO',

1138

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1139

'description': '',

1140

'uploader': '8KVIDEO',

1141

'title': 'UHDTV TEST 8K VIDEO.mp4'

1142

},

1143

'params': {

1144

'youtube_include_dash_manifest': True,

1145

'format': '141',

1146

},

1147

'skip': 'format 141 not served anymore',

1148

},

1149

# DASH manifest with encrypted signature

1150

{

1151

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1156

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1157

'duration': 244,

1158

'uploader': 'AfrojackVEVO',

1159

'uploader_id': 'AfrojackVEVO',

1160

'upload_date': '20131011',

1161

'abr': 129.495,

1162

'like_count': int,

1163

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1164

'playable_in_embed': True,

1165

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1166

'view_count': int,

1167

'track': 'The Spark',

1168

'live_status': 'not_live',

1169

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1170

'channel': 'Afrojack',

1171

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1172

'tags': 'count:19',

1173

'availability': 'public',

1174

'categories': ['Music'],

1175

'age_limit': 0,

1176

'alt_title': 'The Spark',

1177

'channel_follower_count': int

1178

},

1179

'params': {

1180

'youtube_include_dash_manifest': True,

1181

'format': '141/bestaudio[ext=m4a]',

1182

},

1183

},

1184

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1185

{

1186

'note': 'Embed allowed age-gate video',

1187

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1192

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1193

'duration': 142,

1194

'uploader': 'The Witcher',

1195

'uploader_id': 'WitcherGame',

1196

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1197

'upload_date': '20140605',

1198

'age_limit': 18,

1199

'categories': ['Gaming'],

1200

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1201

'availability': 'needs_auth',

1202

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1203

'like_count': int,

1204

'channel': 'The Witcher',

1205

'live_status': 'not_live',

1206

'tags': 'count:17',

1207

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1208

'playable_in_embed': True,

1209

'view_count': int,

1210

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1215

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1220

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1221

'upload_date': '20200408',

1222

'uploader_id': 'FlyingKitty900',

1223

'uploader': 'FlyingKitty',

1224

'age_limit': 18,

1225

'availability': 'needs_auth',

1226

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1227

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1228

'channel': 'FlyingKitty',

1229

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1230

'view_count': int,

1231

'categories': ['Entertainment'],

1232

'live_status': 'not_live',

1233

'tags': ['Flyingkitty', 'godzilla 2'],

1234

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1235

'like_count': int,

1236

'duration': 177,

1237

'playable_in_embed': True,

1238

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1243

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1244

'info_dict': {

1245

'id': 'Tq92D6wQ1mg',

1246

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1247

'ext': 'mp4',

1248

'upload_date': '20191228',

1249

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1250

'uploader': 'Projekt Melody',

1251

'description': 'md5:17eccca93a786d51bc67646756894066',

1252

'age_limit': 18,

1253

'like_count': int,

1254

'availability': 'needs_auth',

1255

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1256

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1257

'view_count': int,

1258

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1259

'channel': 'Projekt Melody',

1260

'live_status': 'not_live',

1261

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1262

'playable_in_embed': True,

1263

'categories': ['Entertainment'],

1264

'duration': 106,

1265

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1266

'comment_count': int,

1267

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1272

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1277

'uploader': 'Herr Lurik',

1278

'uploader_id': 'st3in234',

1279

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1280

'upload_date': '20130730',

1281

'track': 'Such mich find mich',

1282

'age_limit': 0,

1283

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1284

'like_count': int,

1285

'playable_in_embed': False,

1286

'creator': 'OOMPH!',

1287

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1288

'view_count': int,

1289

'alt_title': 'Such mich find mich',

1290

'duration': 210,

1291

'channel': 'Herr Lurik',

1292

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1293

'categories': ['Music'],

1294

'availability': 'public',

1295

'uploader_url': 'http://www.youtube.com/user/st3in234',

1296

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1297

'live_status': 'not_live',

1298

'artist': 'OOMPH!',

1299

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1304

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1305

'only_matching': True,

1306

},

1307

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1308

# YouTube Red ad is not captured for creator

1309

{

1310

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1316

'uploader_id': 'deadmau5',

1317

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1318

'creator': 'deadmau5',

1319

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1320

'uploader': 'deadmau5',

1321

'title': 'Deadmau5 - Some Chords (HD)',

1322

'alt_title': 'Some Chords',

1323

'availability': 'public',

1324

'tags': 'count:14',

1325

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1326

'view_count': int,

1327

'live_status': 'not_live',

1328

'channel': 'deadmau5',

1329

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1330

'like_count': int,

1331

'track': 'Some Chords',

1332

'artist': 'deadmau5',

1333

'playable_in_embed': True,

1334

'age_limit': 0,

1335

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1336

'categories': ['Music'],

1337

'album': 'Some Chords',

1338

'channel_follower_count': int

1339

},

1340

'expected_warnings': [

1341

'DASH manifest missing',

1342

]

1343

},

1344

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1345

{

1346

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1352

'uploader_id': 'olympic',

1353

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1354

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1355

'uploader': 'Olympics',

1356

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1357

'like_count': int,

1358

'release_timestamp': 1343767800,

1359

'playable_in_embed': True,

1360

'categories': ['Sports'],

1361

'release_date': '20120731',

1362

'channel': 'Olympics',

1363

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1364

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1365

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1366

'age_limit': 0,

1367

'availability': 'public',

1368

'live_status': 'was_live',

1369

'view_count': int,

1370

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1371

'channel_follower_count': int

1372

},

1373

'params': {

1374

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1384

'duration': 85,

1385

'upload_date': '20110310',

1386

'uploader_id': 'AllenMeow',

1387

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1388

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1389

'uploader': '孫ᄋᄅ',

1390

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1391

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1396

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1397

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1398

'view_count': int,

1399

'categories': ['People & Blogs'],

1400

'like_count': int,

1401

'live_status': 'not_live',

1402

'availability': 'unlisted',

1403

'comment_count': int,

1404

'channel_follower_count': int

1405

},

1406

},

1407

# url_encoded_fmt_stream_map is empty string

1408

{

1409

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1414

'description': '',

1415

'upload_date': '20150404',

1416

'uploader_id': 'spbelect',

1417

'uploader': 'Наблюдатели Петербурга',

1418

},

1419

'params': {

1420

'skip_download': 'requires avconv',

1421

},

1422

'skip': 'This live event has ended.',

1423

},

1424

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1425

{

1426

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1431

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1432

'duration': 220,

1433

'upload_date': '20150625',

1434

'uploader_id': 'dorappi2000',

1435

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1436

'uploader': 'dorappi2000',

1437

'formats': 'mincount:31',

1438

},

1439

'skip': 'not actual anymore',

1440

},

1441

# DASH manifest with segment_list

1442

{

1443

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1444

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1449

'uploader': 'Airtek',

1450

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1451

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1452

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1453

},

1454

'params': {

1455

'youtube_include_dash_manifest': True,

1456

'format': '135', # bestvideo

1457

},

1458

'skip': 'This live event has ended.',

1459

},

1460

{

1461

# Multifeed videos (multiple cameras), URL is for Main Camera

1462

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1463

'info_dict': {

1464

'id': 'jvGDaLqkpTg',

1465

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1466

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1473

'description': 'md5:e03b909557865076822aa169218d6a5d',

1474

'duration': 10643,

1475

'upload_date': '20161111',

1476

'uploader': 'Team PGP',

1477

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1478

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1485

'description': 'md5:e03b909557865076822aa169218d6a5d',

1486

'duration': 10991,

1487

'upload_date': '20161111',

1488

'uploader': 'Team PGP',

1489

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1490

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1497

'description': 'md5:e03b909557865076822aa169218d6a5d',

1498

'duration': 10995,

1499

'upload_date': '20161111',

1500

'uploader': 'Team PGP',

1501

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1502

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1509

'description': 'md5:e03b909557865076822aa169218d6a5d',

1510

'duration': 10990,

1511

'upload_date': '20161111',

1512

'uploader': 'Team PGP',

1513

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1514

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1519

},

1520

'skip': 'Not multifeed anymore',

1521

},

1522

{

1523

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1524

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1525

'info_dict': {

1526

'id': 'gVfLd0zydlo',

1527

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1528

},

1529

'playlist_count': 2,

1530

'skip': 'Not multifeed anymore',

1531

},

1532

{

1533

'url': 'https://vid.plus/FlRa-iH7PGw',

1534

'only_matching': True,

1535

},

1536

{

1537

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1538

'only_matching': True,

1539

},

1540

{

1541

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1542

# Also tests cut-off URL expansion in video description (see

1543

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1544

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1545

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1550

'alt_title': 'Dark Walk',

1551

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1552

'duration': 133,

1553

'upload_date': '20151119',

1554

'uploader_id': 'IronSoulElf',

1555

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1556

'uploader': 'IronSoulElf',

1557

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1558

'track': 'Dark Walk',

1559

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1560

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1561

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1562

'categories': ['Film & Animation'],

1563

'view_count': int,

1564

'live_status': 'not_live',

1565

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1566

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1567

'tags': 'count:13',

1568

'availability': 'public',

1569

'channel': 'IronSoulElf',

1570

'playable_in_embed': True,

1571

'like_count': int,

1572

'age_limit': 0,

1573

'channel_follower_count': int

1574

},

1575

'params': {

1576

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1581

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1582

'only_matching': True,

1583

},

1584

{

1585

# Video with yt:stretch=17:0

1586

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1591

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1592

'upload_date': '20151107',

1593

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1594

'uploader': 'CH GAMER DROID',

1595

},

1596

'params': {

1597

'skip_download': True,

1598

},

1599

'skip': 'This video does not exist.',

1600

},

1601

{

1602

# Video with incomplete 'yt:stretch=16:'

1603

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1604

'only_matching': True,

1605

},

1606

{

1607

# Video licensed under Creative Commons

1608

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1613

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1614

'duration': 721,

1615

'upload_date': '20150128',

1616

'uploader_id': 'BerkmanCenter',

1617

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1618

'uploader': 'The Berkman Klein Center for Internet & Society',

1619

'license': 'Creative Commons Attribution license (reuse allowed)',

1620

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1621

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1622

'like_count': int,

1623

'age_limit': 0,

1624

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1625

'channel': 'The Berkman Klein Center for Internet & Society',

1626

'availability': 'public',

1627

'view_count': int,

1628

'categories': ['Education'],

1629

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1630

'live_status': 'not_live',

1631

'playable_in_embed': True,

1632

'comment_count': int,

1633

'channel_follower_count': int

1634

},

1635

'params': {

1636

'skip_download': True,

},

},

{

# Channel-like uploader_url

1641

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1646

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1647

'duration': 4060,

1648

'upload_date': '20151120',

1649

'uploader': 'Bernie Sanders',

1650

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1651

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1652

'license': 'Creative Commons Attribution license (reuse allowed)',

1653

'playable_in_embed': True,

1654

'tags': 'count:12',

1655

'like_count': int,

1656

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1657

'age_limit': 0,

1658

'availability': 'public',

1659

'categories': ['News & Politics'],

1660

'channel': 'Bernie Sanders',

1661

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1662

'view_count': int,

1663

'live_status': 'not_live',

1664

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1665

'comment_count': int,

1666

'channel_follower_count': int

1667

},

1668

'params': {

1669

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1674

'only_matching': True,

1675

},

1676

{

1677

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1678

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1679

'only_matching': True,

1680

},

1681

{

1682

# Rental video preview

1683

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1688

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1689

'upload_date': '20150811',

1690

'uploader': 'FlixMatrix',

1691

'uploader_id': 'FlixMatrixKaravan',

1692

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1693

'license': 'Standard YouTube License',

1694

},

1695

'params': {

1696

'skip_download': True,

1697

},

1698

'skip': 'This video is not available.',

1699

},

1700

{

1701

# YouTube Red video with episode data

1702

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1707

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1708

'duration': 2085,

1709

'upload_date': '20170118',

1710

'uploader': 'Vsauce',

1711

'uploader_id': 'Vsauce',

1712

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1713

'series': 'Mind Field',

1714

'season_number': 1,

1715

'episode_number': 1,

1716

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1717

'tags': 'count:12',

1718

'view_count': int,

1719

'availability': 'public',

1720

'age_limit': 0,

1721

'channel': 'Vsauce',

1722

'episode': 'Episode 1',

1723

'categories': ['Entertainment'],

1724

'season': 'Season 1',

1725

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1726

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1727

'like_count': int,

1728

'playable_in_embed': True,

1729

'live_status': 'not_live',

1730

'channel_follower_count': int

1731

},

1732

'params': {

1733

'skip_download': True,

1734

},

1735

'expected_warnings': [

1736

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1741

# as inappropriate or offensive to some audiences.

1742

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1747

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1748

'duration': 965,

1749

'upload_date': '20140124',

1750

'uploader': 'New Century Foundation',

1751

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1752

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1753

},

1754

'params': {

1755

'skip_download': True,

1756

},

1757

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1762

'only_matching': True,

1763

},

1764

{

1765

# geo restricted to JP

1766

'url': 'sJL6WA-aGkQ',

1767

'only_matching': True,

1768

},

1769

{

1770

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1771

'only_matching': True,

1772

},

1773

{

1774

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1775

'only_matching': True,

1776

},

1777

{

1778

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1779

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1780

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1785

'only_matching': True,

1786

},

1787

{

1788

# Video with unsupported adaptive stream type formats

1789

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1794

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1795

'duration': 433,

1796

'upload_date': '20130923',

1797

'uploader': 'Amelia Putri Harwita',

1798

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1800

'formats': 'maxcount:10',

1801

},

1802

'params': {

1803

'skip_download': True,

1804

'youtube_include_dash_manifest': False,

1805

},

1806

'skip': 'not actual anymore',

1807

},

1808

{

1809

# Youtube Music Auto-generated description

1810

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1815

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1816

'upload_date': '20190312',

1817

'uploader': 'Stephen - Topic',

1818

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1819

'artist': 'Stephen',

1820

'track': 'Voyeur Girl',

1821

'album': 'it\'s too much love to know my dear',

1822

'release_date': '20190313',

1823

'release_year': 2019,

1824

'alt_title': 'Voyeur Girl',

1825

'view_count': int,

1826

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1827

'playable_in_embed': True,

1828

'like_count': int,

1829

'categories': ['Music'],

1830

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1831

'channel': 'Stephen',

1832

'availability': 'public',

1833

'creator': 'Stephen',

1834

'duration': 169,

1835

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1836

'age_limit': 0,

1837

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1838

'tags': 'count:11',

1839

'live_status': 'not_live',

1840

'channel_follower_count': int

1841

},

1842

'params': {

1843

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1848

'only_matching': True,

1849

},

1850

{

1851

# invalid -> valid video id redirection

1852

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1857

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1858

'upload_date': '20090125',

1859

'uploader': 'Prochorowka',

1860

'uploader_id': 'Prochorowka',

1861

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1862

'artist': 'Panjabi MC',

1863

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1864

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1865

},

1866

'params': {

1867

'skip_download': True,

1868

},

1869

'skip': 'Video unavailable',

1870

},

1871

{

1872

# empty description results in an empty string

1873

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1880

'uploader_id': 'ElevageOrVert',

1881

'uploader': 'ElevageOrVert',

1882

'view_count': int,

1883

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1884

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1885

'like_count': int,

1886

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1887

'tags': [],

1888

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1889

'availability': 'public',

1890

'age_limit': 0,

1891

'categories': ['Pets & Animals'],

1892

'duration': 7,

1893

'playable_in_embed': True,

1894

'live_status': 'not_live',

1895

'channel': 'ElevageOrVert',

1896

'channel_follower_count': int

1897

},

1898

'params': {

1899

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1904

# see [2] for an example with '};' inside ytInitialPlayerResponse

1905

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1906

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1907

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1912

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1913

'upload_date': '20130831',

1914

'uploader_id': 'kudvenkat',

1915

'uploader': 'kudvenkat',

1916

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1917

'like_count': int,

1918

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1919

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1920

'live_status': 'not_live',

1921

'categories': ['Education'],

1922

'availability': 'public',

1923

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1924

'tags': 'count:12',

1925

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1930

'comment_count': int,

1931

'channel_follower_count': int

1932

},

1933

'params': {

1934

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1939

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1940

'only_matching': True,

1941

},

1942

{

1943

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1944

'only_matching': True,

1945

},

1946

{

1947

# https://github.com/ytdl-org/youtube-dl/pull/28094

1948

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1954

'upload_date': '20141120',

1955

'uploader': 'The Cinematic Orchestra - Topic',

1956

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1957

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1958

'artist': 'The Cinematic Orchestra',

1959

'track': 'Burn Out',

1960

'album': 'Every Day',

1961

'like_count': int,

1962

'live_status': 'not_live',

1963

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1968

'creator': 'The Cinematic Orchestra',

1969

'channel': 'The Cinematic Orchestra',

1970

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1971

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1972

'availability': 'public',

1973

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1974

'categories': ['Music'],

1975

'playable_in_embed': True,

1976

'channel_follower_count': int

1977

},

1978

'params': {

1979

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1984

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1985

'only_matching': True,

1986

},

1987

{

1988

# controversial video, requires bpctr/contentCheckOk

1989

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1994

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1995

'uploader': 'CBS Mornings',

1996

'uploader_id': 'CBSThisMorning',

1997

'upload_date': '20140716',

1998

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1999

'duration': 170,

2000

'categories': ['News & Politics'],

2001

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2002

'view_count': int,

2003

'channel': 'CBS Mornings',

2004

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2005

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2006

'age_limit': 18,

2007

'availability': 'needs_auth',

2008

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2009

'like_count': int,

2010

'live_status': 'not_live',

2011

'playable_in_embed': True,

2012

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2017

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2022

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2023

'upload_date': '20201120',

2024

'uploader': 'Walk around Japan',

2025

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2027

'duration': 1456,

2028

'categories': ['Travel & Events'],

2029

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2030

'view_count': int,

2031

'channel': 'Walk around Japan',

2032

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2033

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2034

'age_limit': 0,

2035

'availability': 'public',

2036

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2037

'live_status': 'not_live',

2038

'playable_in_embed': True,

2039

'channel_follower_count': int

2040

},

2041

'params': {

2042

'skip_download': True,

2043

},

2044

}, {

2045

# Has multiple audio streams

2046

'url': 'WaOKSUlf4TM',

2047

'only_matching': True

2048

}, {

2049

# Requires Premium: has format 141 when requested using YTM url

2050

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2051

'only_matching': True

2052

}, {

2053

# multiple subtitles with same lang_code

2054

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2055

'only_matching': True,

2056

}, {

2057

# Force use android client fallback

2058

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2059

'info_dict': {

2060

'id': 'YOelRv7fMxY',

2061

'title': 'DIGGING A SECRET TUNNEL Part 1',

2062

'ext': '3gp',

2063

'upload_date': '20210624',

2064

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2065

'uploader': 'colinfurze',

2066

'uploader_id': 'colinfurze',

2067

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2068

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2069

'duration': 596,

2070

'categories': ['Entertainment'],

2071

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2072

'view_count': int,

2073

'channel': 'colinfurze',

2074

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2075

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2076

'age_limit': 0,

2077

'availability': 'public',

2078

'like_count': int,

2079

'live_status': 'not_live',

2080

'playable_in_embed': True,

2081

'channel_follower_count': int

2082

},

2083

'params': {

2084

'format': '17', # 3gp format available on android

2085

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2090

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2091

'only_matching': True,

2092

'params': {

2093

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2098

'only_matching': True,

2099

}, {

2100

'note': 'Storyboards',

2101

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2107

'uploader_id': 'scishow',

2108

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2109

'upload_date': '20140324',

2110

'uploader': 'SciShow',

2111

'like_count': int,

2112

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2113

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2114

'view_count': int,

2115

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2116

'playable_in_embed': True,

2117

'tags': 'count:12',

2118

'uploader_url': 'http://www.youtube.com/user/scishow',

2119

'availability': 'public',

2120

'channel': 'SciShow',

2121

'live_status': 'not_live',

2122

'duration': 248,

2123

'categories': ['Education'],

2124

'age_limit': 0,

2125

'channel_follower_count': int

2126

}, 'params': {'format': 'mhtml', 'skip_download': True}

2127

}, {

2128

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2129

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2134

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2135

'uploader': 'Leon Nguyen',

2136

'uploader_id': 'VNSXIII',

2137

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2138

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2139

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2144

'tags': 'count:23',

2145

'playable_in_embed': True,

2146

'live_status': 'not_live',

2147

'upload_date': '20220103',

2148

'like_count': int,

2149

'availability': 'public',

2150

'channel': 'Leon Nguyen',

2151

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2152

'comment_count': int,

2153

'channel_follower_count': int

2154

}

2155

}, {

2156

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2157

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2162

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2163

'uploader': 'Quackity',

2164

'uploader_id': 'QuackityHQ',

2165

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2166

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2167

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2172

'tags': 'count:26',

2173

'playable_in_embed': True,

2174

'live_status': 'not_live',

2175

'release_timestamp': 1641172509,

2176

'release_date': '20220103',

2177

'upload_date': '20220103',

2178

'like_count': int,

2179

'availability': 'public',

2180

'channel': 'Quackity',

2181

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2182

'channel_follower_count': int

2183

}

2184

},

2185

{ # continuous livestream. Microformat upload date should be preferred.

2186

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2187

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2188

'info_dict': {

2189

'id': 'kgx4WGK0oNU',

2190

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2191

'ext': 'mp4',

2192

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2193

'availability': 'public',

2194

'age_limit': 0,

2195

'release_timestamp': 1637975704,

2196

'upload_date': '20210619',

2197

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2198

'live_status': 'is_live',

2199

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2200

'uploader': '阿鲍Abao',

2201

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2202

'channel': 'Abao in Tokyo',

2203

'channel_follower_count': int,

2204

'release_date': '20211127',

2205

'tags': 'count:39',

2206

'categories': ['People & Blogs'],

2207

'like_count': int,

2208

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2209

'view_count': int,

2210

'playable_in_embed': True,

2211

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2212

},

2213

'params': {'skip_download': True}

2214

}, {

2215

# Story. Requires specific player params to work.

2216

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2221

'view_count': int,

2222

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2223

'upload_date': '20220526',

2224

'categories': ['Education'],

2225

'title': 'Story',

2226

'channel': 'IT\'S HISTORY',

2227

'description': '',

2228

'uploader_id': 'BlastfromthePast',

2229

'duration': 12,

2230

'uploader': 'IT\'S HISTORY',

2231

'playable_in_embed': True,

2232

'age_limit': 0,

2233

'live_status': 'not_live',

2234

'tags': [],

2235

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2236

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2237

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2238

},

2239

'skip': 'stories get removed after some period of time',

2240

}, {

2241

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2246

'upload_date': '20220323',

2247

'like_count': int,

2248

'availability': 'unlisted',

2249

'channel': 'nao20010128nao',

2250

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2251

'age_limit': 0,

2252

'uploader': 'nao20010128nao',

2253

'uploader_id': 'nao20010128nao',

2254

'categories': ['Music'],

2255

'view_count': int,

2256

'description': '',

2257

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2258

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2259

'live_status': 'not_live',

2260

'playable_in_embed': True,

2261

'channel_follower_count': int,

2262

'duration': 6,

2263

'tags': [],

2264

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2271

from ..utils import parse_qs

2272

2273

qs = parse_qs(url)

2274

if qs.get('list', [None])[0]:

2275

return False

2276

return super().suitable(url)

2277

2278

def __init__(self, *args, **kwargs):

2279

super().__init__(*args, **kwargs)

2280

self._code_cache = {}

2281

self._player_cache = {}

2282

2283

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2284

lock = threading.Lock()

2285

2286

is_live = True

2287

start_time = time.time()

2288

formats = [f for f in formats if f.get('is_from_start')]

2289

2290

def refetch_manifest(format_id, delay):

2291

nonlocal formats, start_time, is_live

2292

if time.time() <= start_time + delay:

2293

return

2294

2295

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2296

video_details = traverse_obj(

2297

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2298

microformats = traverse_obj(

2299

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2300

expected_type=dict, default=[])

2301

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2302

start_time = time.time()

2303

2304

def mpd_feed(format_id, delay):

2305

"""

2306

@returns (manifest_url, manifest_stream_number, is_live) or None

2307

"""

2308

with lock:

2309

refetch_manifest(format_id, delay)

2310

2311

f = next((f for f in formats if f['format_id'] == format_id), None)

2312

if not f:

2313

if not is_live:

2314

self.to_screen(f'{video_id}: Video is no longer live')

2315

else:

2316

self.report_warning(

2317

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2318

return None

2319

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2324

f['fragments'] = functools.partial(

2325

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2326

2327

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2328

FETCH_SPAN, MAX_DURATION = 5, 432000

2329

2330

mpd_url, stream_number, is_live = None, None, True

2331

2332

begin_index = 0

2333

download_start_time = ctx.get('start') or time.time()

2334

2335

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2336

if lack_early_segments:

2337

self.report_warning(bug_reports_message(

2338

'Starting download from the last 120 hours of the live stream since '

2339

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2340

lack_early_segments = True

2341

2342

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2343

fragments, fragment_base_url = None, None

2344

2345

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2346

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2347

# Obtain from MPD's maximum seq value

2348

old_mpd_url = mpd_url

2349

last_error = ctx.pop('last_error', None)

2350

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2351

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2352

or (mpd_url, stream_number, False))

2353

if not refresh_sequence:

2354

if expire_fast and not is_live:

2355

return False, last_seq

2356

elif old_mpd_url == mpd_url:

2357

return True, last_seq

2358

try:

2359

fmts, _ = self._extract_mpd_formats_and_subtitles(

2360

mpd_url, None, note=False, errnote=False, fatal=False)

2361

except ExtractorError:

2362

fmts = None

2363

if not fmts:

2364

no_fragment_score += 2

2365

return False, last_seq

2366

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2367

fragments = fmt_info['fragments']

2368

fragment_base_url = fmt_info['fragment_base_url']

2369

assert fragment_base_url

2370

2371

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2372

return True, _last_seq

2373

2374

while is_live:

2375

fetch_time = time.time()

2376

if no_fragment_score > 30:

2377

return

2378

if last_segment_url:

2379

# Obtain from "X-Head-Seqnum" header value from each segment

2380

try:

2381

urlh = self._request_webpage(

2382

last_segment_url, None, note=False, errnote=False, fatal=False)

2383

except ExtractorError:

2384

urlh = None

2385

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2386

if last_seq is None:

2387

no_fragment_score += 2

2388

last_segment_url = None

2389

continue

2390

else:

2391

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2392

no_fragment_score += 2

2393

if not should_continue:

2394

continue

2395

2396

if known_idx > last_seq:

2397

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2403

# skip from the start when it's negative value

2404

known_idx = last_seq + begin_index

2405

if lack_early_segments:

2406

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2407

try:

2408

for idx in range(known_idx, last_seq):

2409

# do not update sequence here or you'll get skipped some part of it

2410

should_continue, _ = _extract_sequence_from_mpd(False, False)

2411

if not should_continue:

2412

known_idx = idx - 1

2413

raise ExtractorError('breaking out of outer loop')

2414

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2415

yield {

2416

'url': last_segment_url,

2417

'fragment_count': last_seq,

2418

}

2419

if known_idx == last_seq:

2420

no_fragment_score += 5

2421

else:

2422

no_fragment_score = 0

2423

known_idx = last_seq

2424

except ExtractorError:

2425

continue

2426

2427

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2428

2429

def _extract_player_url(self, *ytcfgs, webpage=None):

2430

player_url = traverse_obj(

2431

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2432

get_all=False, expected_type=str)

2433

if not player_url:

2434

return

2435

return urljoin('https://www.youtube.com', player_url)

2436

2437

def _download_player_url(self, video_id, fatal=False):

2438

res = self._download_webpage(

2439

'https://www.youtube.com/iframe_api',

2440

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2441

if res:

2442

player_version = self._search_regex(

2443

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2444

if player_version:

2445

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2446

2447

def _signature_cache_id(self, example_sig):

2448

""" Return a string representation of a signature """

2449

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2450

2451

@classmethod

2452

def _extract_player_info(cls, player_url):

2453

for player_re in cls._PLAYER_INFO_RE:

2454

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2459

return id_m.group('id')

2460

2461

def _load_player(self, video_id, player_url, fatal=True):

2462

player_id = self._extract_player_info(player_url)

2463

if player_id not in self._code_cache:

2464

code = self._download_webpage(

2465

player_url, video_id, fatal=fatal,

2466

note='Downloading player ' + player_id,

2467

errnote='Download of %s failed' % player_url)

2468

if code:

2469

self._code_cache[player_id] = code

2470

return self._code_cache.get(player_id)

2471

2472

def _extract_signature_function(self, video_id, player_url, example_sig):

2473

player_id = self._extract_player_info(player_url)

2474

2475

# Read from filesystem cache

2476

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2477

assert os.path.basename(func_id) == func_id

2478

2479

self.write_debug(f'Extracting signature function {func_id}')

2480

cache_spec = self.cache.load('youtube-sigfuncs', func_id)

2481

if cache_spec is not None:

2482

return lambda s: ''.join(s[i] for i in cache_spec)

2483

2484

code = self._load_player(video_id, player_url)

2485

if code:

2486

res = self._parse_sig_js(code)

2487

2488

test_string = ''.join(map(chr, range(len(example_sig))))

2489

cache_res = res(test_string)

2490

cache_spec = [ord(c) for c in cache_res]

2491

2492

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2493

return res

2494

2495

def _print_sig_code(self, func, example_sig):

2496

if not self.get_param('youtube_print_sig_code'):

2497

return

2498

2499

def gen_sig_code(idxs):

2500

def _genslice(start, end, step):

2501

starts = '' if start == 0 else str(start)

2502

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2503

steps = '' if step == 1 else (':%d' % step)

2504

return f's[{starts}{ends}{steps}]'

2505

2506

step = None

2507

# Quelch pyflakes warnings - start will be set when step is set

2508

start = '(Never used)'

2509

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2514

step = None

2515

continue

2516

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2526

2527

test_string = ''.join(map(chr, range(len(example_sig))))

2528

cache_res = func(test_string)

2529

cache_spec = [ord(c) for c in cache_res]

2530

expr_code = ' + '.join(gen_sig_code(cache_spec))

2531

signature_id_tuple = '(%s)' % (

2532

', '.join(str(len(p)) for p in example_sig.split('.')))

2533

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2534

' return %s\n') % (signature_id_tuple, expr_code)

2535

self.to_screen('Extracted signature function:\n' + code)

2536

2537

def _parse_sig_js(self, jscode):

2538

funcname = self._search_regex(

2539

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2540

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2541

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2542

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2543

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2544

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2545

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2546

# Obsolete patterns

2547

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2548

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2549

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2550

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2551

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2552

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2553

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2554

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2555

jscode, 'Initial JS player signature function name', group='sig')

2556

2557

jsi = JSInterpreter(jscode)

2558

initial_function = jsi.extract_function(funcname)

2559

return lambda s: initial_function([s])

2560

2561

def _decrypt_signature(self, s, video_id, player_url):

2562

"""Turn the encrypted s field into a working signature"""

2563

try:

2564

player_id = (player_url, self._signature_cache_id(s))

2565

if player_id not in self._player_cache:

2566

func = self._extract_signature_function(video_id, player_url, s)

2567

self._player_cache[player_id] = func

2568

func = self._player_cache[player_id]

2569

self._print_sig_code(func, s)

2570

return func(s)

2571

except Exception as e:

2572

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2573

2574

def _decrypt_nsig(self, s, video_id, player_url):

2575

"""Turn the encrypted n field into a working signature"""

2576

if player_url is None:

2577

raise ExtractorError('Cannot decrypt nsig without player_url')

2578

player_url = urljoin('https://www.youtube.com', player_url)

2579

2580

sig_id = ('nsig_value', s)

2581

if sig_id in self._player_cache:

2582

return self._player_cache[sig_id]

2583

2584

try:

2585

player_id = ('nsig', player_url)

2586

if player_id not in self._player_cache:

2587

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2588

func = self._player_cache[player_id]

2589

self._player_cache[sig_id] = func(s)

2590

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2591

return self._player_cache[sig_id]

2592

except Exception as e:

2593

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2594

2595

def _extract_n_function_name(self, jscode):

2596

nfunc, idx = self._search_regex(

2597

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2598

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2599

if not idx:

2600

return nfunc

2601

return json.loads(js_to_json(self._search_regex(

2602

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2603

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2604

2605

def _extract_n_function(self, video_id, player_url):

2606

player_id = self._extract_player_info(player_url)

2607

func_code = self.cache.load('youtube-nsig', player_id)

2608

2609

if func_code:

2610

jsi = JSInterpreter(func_code)

2611

else:

2612

jscode = self._load_player(video_id, player_url)

2613

funcname = self._extract_n_function_name(jscode)

2614

jsi = JSInterpreter(jscode)

2615

func_code = jsi.extract_function_code(funcname)

2616

self.cache.store('youtube-nsig', player_id, func_code)

2617

2618

if self.get_param('youtube_print_sig_code'):

2619

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2620

2621

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2622

2623

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2624

"""

2625

Extract signatureTimestamp (sts)

2626

Required to tell API what sig/player version is in use.

2627

"""

2628

sts = None

2629

if isinstance(ytcfg, dict):

2630

sts = int_or_none(ytcfg.get('STS'))

2631

2632

if not sts:

2633

# Attempt to extract from player

2634

if player_url is None:

2635

error_msg = 'Cannot extract signature timestamp without player_url.'

2636

if fatal:

2637

raise ExtractorError(error_msg)

2638

self.report_warning(error_msg)

2639

return

2640

code = self._load_player(video_id, player_url, fatal=fatal)

2641

if code:

2642

sts = int_or_none(self._search_regex(

2643

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2644

'JS player signature timestamp', group='sts', fatal=fatal))

2645

return sts

2646

2647

def _mark_watched(self, video_id, player_responses):

2648

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2649

label = 'fully ' if is_full else ''

2650

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2651

expected_type=url_or_none)

2652

if not url:

2653

self.report_warning(f'Unable to mark {label}watched')

2654

return

2655

parsed_url = urllib.parse.urlparse(url)

2656

qs = urllib.parse.parse_qs(parsed_url.query)

2657

2658

# cpn generation algorithm is reverse engineered from base.js.

2659

# In fact it works even with dummy cpn.

2660

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2661

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2662

2663

# # more consistent results setting it to right before the end

2664

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2675

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2682

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2683

2684

self._download_webpage(

2685

url, video_id, f'Marking {label}watched',

2686

'Unable to mark watched', fatal=False)

2687

2688

@staticmethod

2689

def _extract_urls(webpage):

2690

# Embedded YouTube player

2691

entries = [

2692

unescapeHTML(mobj.group('url'))

2693

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2704

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2705

\1''', webpage)]

2706

2707

# lazyYT YouTube embed

2708

entries.extend(list(map(

2709

unescapeHTML,

2710

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2711

2712

# Wordpress "YouTube Video Importer" plugin

2713

matches = re.findall(r'''(?x)<div[^>]+

2714

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2715

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2716

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2722

urls = YoutubeIE._extract_urls(webpage)

2723

return urls[0] if urls else None

2724

2725

@classmethod

2726

def extract_id(cls, url):

2727

video_id = cls.get_temp_id(url)

2728

if not video_id:

2729

raise ExtractorError(f'Invalid URL: {url}')

2730

return video_id

2731

2732

def _extract_chapters_from_json(self, data, duration):

2733

chapter_list = traverse_obj(

2734

data, (

2735

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2736

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2737

), expected_type=list)

2738

2739

return self._extract_chapters(

2740

chapter_list,

2741

chapter_time=lambda chapter: float_or_none(

2742

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2743

chapter_title=lambda chapter: traverse_obj(

2744

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2745

duration=duration)

2746

2747

def _extract_chapters_from_engagement_panel(self, data, duration):

2748

content_list = traverse_obj(

2749

data,

2750

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2751

expected_type=list, default=[])

2752

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2753

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2754

2755

return next(filter(None, (

2756

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2757

chapter_time, chapter_title, duration)

2758

for contents in content_list)), [])

2759

2760

def _extract_chapters_from_description(self, description, duration):

2761

return self._extract_chapters(

2762

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2763

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2764

duration=duration, strict=False)

2765

2766

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2771

'title': chapter_title(chapter),

2772

} for chapter in chapter_list or []]

2773

if not strict:

2774

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2775

2776

chapters = [{'start_time': 0}]

2777

for idx, chapter in enumerate(chapter_list):

2778

if chapter['start_time'] is None:

2779

self.report_warning(f'Incomplete chapter {idx}')

2780

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2781

chapters.append(chapter)

2782

else:

2783

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2784

return chapters[1:]

2785

2786

def _extract_comment(self, comment_renderer, parent=None):

2787

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2792

2793

# note: timestamp is an estimate calculated from the current time and time_text

2794

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2795

author = self._get_text(comment_renderer, 'authorText')

2796

author_id = try_get(comment_renderer,

2797

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2798

2799

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2800

lambda x: x['likeCount']), str)) or 0

2801

author_thumbnail = try_get(comment_renderer,

2802

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2803

2804

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2805

is_favorited = 'creatorHeart' in (try_get(

2806

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2811

'time_text': time_text,

2812

'like_count': votes,

2813

'is_favorited': is_favorited,

2814

'author': author,

2815

'author_id': author_id,

2816

'author_thumbnail': author_thumbnail,

2817

'author_is_uploader': author_is_uploader,

2818

'parent': parent or 'root'

2819

}

2820

2821

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2822

2823

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2824

2825

def extract_header(contents):

2826

_continuation = None

2827

for content in contents:

2828

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2829

expected_comment_count = self._get_count(

2830

comments_header_renderer, 'countText', 'commentsCount')

2831

2832

if expected_comment_count:

2833

tracker['est_total'] = expected_comment_count

2834

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2835

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2836

2837

sort_menu_item = try_get(

2838

comments_header_renderer,

2839

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2840

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2841

2842

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2843

if not _continuation:

2844

continue

2845

2846

sort_text = str_or_none(sort_menu_item.get('title'))

2847

if not sort_text:

2848

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2849

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2854

if not parent:

2855

tracker['current_page_thread'] = 0

2856

for content in contents:

2857

if not parent and tracker['total_parent_comments'] >= max_parents:

2858

yield

2859

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2860

comment_renderer = get_first(

2861

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2862

expected_type=dict, default={})

2863

2864

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2869

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2870

yield comment

2871

2872

# Attempt to get the replies

2873

comment_replies_renderer = try_get(

2874

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2875

2876

if comment_replies_renderer:

2877

tracker['current_page_thread'] += 1

2878

comment_entries_iter = self._comment_entries(

2879

comment_replies_renderer, ytcfg, video_id,

2880

parent=comment.get('id'), tracker=tracker)

2881

yield from itertools.islice(comment_entries_iter, min(

2882

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2883

2884

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2890

total_parent_comments=0,

2891

total_reply_comments=0)

2892

2893

# TODO: Deprecated

2894

# YouTube comments have a max depth of 2

2895

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2896

if max_depth:

2897

self._downloader.deprecation_warning(

2898

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2899

if max_depth == 1 and parent:

2900

return

2901

2902

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2903

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2904

2905

continuation = self._extract_continuation(root_continuation_data)

2906

2907

response = None

2908

is_forced_continuation = False

2909

is_first_continuation = parent is None

2910

if is_first_continuation and not continuation:

2911

# Sometimes you can get comments by generating the continuation yourself,

2912

# even if YouTube initially reports them being disabled - e.g. stories comments.

2913

# Note: if the comment section is actually disabled, YouTube may return a response with

2914

# required check_get_keys missing. So we will disable that check initially in this case.

2915

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2916

is_forced_continuation = True

2917

2918

for page_num in itertools.count(0):

2919

if not continuation:

2920

break

2921

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2922

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2923

if page_num == 0:

2924

if is_first_continuation:

2925

note_prefix = 'Downloading comment section API JSON'

2926

else:

2927

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2928

tracker['current_page_thread'], comment_prog_str)

2929

else:

2930

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2931

' ' if parent else '', ' replies' if parent else '',

2932

page_num, comment_prog_str)

2933

2934

response = self._extract_response(

2935

item_id=None, query=continuation,

2936

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2937

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2938

is_forced_continuation = False

2939

continuation_contents = traverse_obj(

2940

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2941

2942

continuation = None

2943

for continuation_section in continuation_contents:

2944

continuation_items = traverse_obj(

2945

continuation_section,

2946

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2947

get_all=False, expected_type=list) or []

2948

if is_first_continuation:

2949

continuation = extract_header(continuation_items)

2950

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2964

if message and not parent and tracker['running_total'] == 0:

2965

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2966

2967

@staticmethod

2968

def _generate_comment_continuation(video_id):

2969

"""

2970

Generates initial comment section continuation token from given video id

2971

"""

2972

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2973

return base64.b64encode(token.encode()).decode()

2974

2975

def _get_comments(self, ytcfg, video_id, contents, webpage):

2976

"""Entry for comment extraction"""

2977

def _real_comment_extract(contents):

2978

renderer = next((

2979

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2980

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2981

yield from self._comment_entries(renderer, ytcfg, video_id)

2982

2983

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2984

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2985

2986

@staticmethod

2987

def _get_checkok_params():

2988

return {'contentCheckOk': True, 'racyCheckOk': True}

2989

2990

@classmethod

2991

def _generate_player_context(cls, sts=None):

2992

context = {

2993

'html5Preference': 'HTML5_PREF_WANTS',

2994

}

2995

if sts is not None:

2996

context['signatureTimestamp'] = sts

2997

return {

2998

'playbackContext': {

2999

'contentPlaybackContext': context

3000

},

3001

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3006

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3007

return True

3008

3009

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3010

AGE_GATE_REASONS = (

3011

'confirm your age', 'age-restricted', 'inappropriate', # reason

3012

'age_verification_required', 'age_check_required', # status

3013

)

3014

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3015

3016

@staticmethod

3017

def _is_unplayable(player_response):

3018

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3019

3020

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3021

3022

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3023

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3024

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3025

headers = self.generate_api_headers(

3026

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3031

}

3032

yt_query.update(self._generate_player_context(sts))

3033

return self._extract_response(

3034

item_id=video_id, ep='player', query=yt_query,

3035

ytcfg=player_ytcfg, headers=headers, fatal=True,

3036

default_client=client,

3037

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3038

) or None

3039

3040

def _get_requested_clients(self, url, smuggled_data):

3041

requested_clients = []

3042

default = ['android', 'web']

3043

allowed_clients = sorted(

3044

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3045

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3046

for client in self._configuration_arg('player_client'):

3047

if client in allowed_clients:

3048

requested_clients.append(client)

3049

elif client == 'default':

3050

requested_clients.extend(default)

3051

elif client == 'all':

3052

requested_clients.extend(allowed_clients)

3053

else:

3054

self.report_warning(f'Skipping unsupported client {client}')

3055

if not requested_clients:

3056

requested_clients = default

3057

3058

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3059

requested_clients.extend(

3060

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3061

3062

return orderedSet(requested_clients)

3063

3064

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3065

initial_pr = None

3066

if webpage:

3067

initial_pr = self._search_json(

3068

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3069

3070

all_clients = set(clients)

3071

clients = clients[::-1]

3072

prs = []

3073

3074

def append_client(*client_names):

3075

""" Append the first client name that exists but not already used """

3076

for client_name in client_names:

3077

actual_client = _split_innertube_client(client_name)[0]

3078

if actual_client in INNERTUBE_CLIENTS:

3079

if actual_client not in all_clients:

3080

clients.append(client_name)

3081

all_clients.add(actual_client)

3082

return

3083

3084

# Android player_response does not have microFormats which are needed for

3085

# extraction of some data. So we return the initial_pr with formats

3086

# stripped out even if not requested by the user

3087

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3088

if initial_pr:

3089

pr = dict(initial_pr)

3090

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3095

player_url = None

3096

while clients:

3097

client, base_client, variant = _split_innertube_client(clients.pop())

3098

player_ytcfg = master_ytcfg if client == 'web' else {}

3099

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3100

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3101

3102

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3103

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3104

if 'js' in self._configuration_arg('player_skip'):

3105

require_js_player = False

3106

player_url = None

3107

3108

if not player_url and not tried_iframe_fallback and require_js_player:

3109

player_url = self._download_player_url(video_id)

3110

tried_iframe_fallback = True

3111

3112

try:

3113

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3114

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3115

except ExtractorError as e:

3116

if last_error:

3117

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3125

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3126

append_client(f'{base_client}_creator')

3127

elif self._is_agegated(pr):

3128

if variant == 'tv_embedded':

3129

append_client(f'{base_client}_embedded')

3130

elif not variant:

3131

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3137

return prs, player_url

3138

3139

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3140

itags, stream_ids = {}, []

3141

itag_qualities, res_qualities = {}, {}

3142

q = qualities([

3143

# Normally tiny is the smallest video-only formats. But

3144

# audio-only formats with unknown quality may get tagged as tiny

3145

'tiny',

3146

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3147

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3148

])

3149

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3150

3151

for fmt in streaming_formats:

3152

if fmt.get('targetDurationSec'):

3153

continue

3154

3155

itag = str_or_none(fmt.get('itag'))

3156

audio_track = fmt.get('audioTrack') or {}

3157

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3158

if stream_id in stream_ids:

3159

continue

3160

3161

quality = fmt.get('quality')

3162

height = int_or_none(fmt.get('height'))

3163

if quality == 'tiny' or not quality:

3164

quality = fmt.get('audioQuality', '').lower() or quality

3165

# The 3gp format (17) in android client has a quality of "small",

3166

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3172

if height:

3173

res_qualities[height] = quality

3174

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3175

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3176

# number of fragment that would subsequently requested with (`&sq=N`)

3177

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3178

continue

3179

3180

fmt_url = fmt.get('url')

3181

if not fmt_url:

3182

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3183

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3184

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3185

if not all((sc, fmt_url, player_url, encrypted_sig)):

3186

continue

3187

try:

3188

fmt_url += '&%s=%s' % (

3189

traverse_obj(sc, ('sp', -1)) or 'signature',

3190

self._decrypt_signature(encrypted_sig, video_id, player_url)

3191

)

3192

except ExtractorError as e:

3193

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3194

self.write_debug(e, only_once=True)

3195

continue

3196

3197

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3202

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3203

except ExtractorError as e:

3204

self.report_warning(

3205

'nsig extraction failed: You may experience throttling for some formats\n'

3206

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3207

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3212

stream_ids.append(stream_id)

3213

3214

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3215

language_preference = (

3216

10 if audio_track.get('audioIsDefault') and 10

3217

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3218

else -1)

3219

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3220

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3221

# Make sure to avoid false positives with small duration differences.

3222

# Eg: __2ABJjxzNo, ySuUZEjARPY

3223

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3224

if is_damaged:

3225

self.report_warning(

3226

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3227

dct = {

3228

'asr': int_or_none(fmt.get('audioSampleRate')),

3229

'filesize': int_or_none(fmt.get('contentLength')),

3230

'format_id': itag,

3231

'format_note': join_nonempty(

3232

'%s%s' % (audio_track.get('displayName') or '',

3233

' (default)' if language_preference > 0 else ''),

3234

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3235

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3236

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3237

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3238

'fps': int_or_none(fmt.get('fps')) or None,

3239

'height': height,

3240

'quality': q(quality),

3241

'has_drm': bool(fmt.get('drmFamilies')),

3242

'tbr': tbr,

3243

'url': fmt_url,

3244

'width': int_or_none(fmt.get('width')),

3245

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3246

'desc' if language_preference < -1 else ''),

3247

'language_preference': language_preference,

3248

# Strictly de-prioritize damaged and 3gp formats

3249

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3250

}

3251

mime_mobj = re.match(

3252

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3253

if mime_mobj:

3254

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3255

dct.update(parse_codecs(mime_mobj.group(2)))

3256

no_audio = dct.get('acodec') == 'none'

3257

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3263

dct['downloader_options'] = {

3264

# Youtube throttles chunks >~10M

3265

'http_chunk_size': 10485760,

3266

}

3267

if dct.get('ext'):

3268

dct['container'] = dct['ext'] + '_dash'

3269

yield dct

3270

3271

live_from_start = is_live and self.get_param('live_from_start')

3272

skip_manifests = self._configuration_arg('skip')

3273

if not self.get_param('youtube_include_hls_manifest', True):

3274

skip_manifests.append('hls')

3275

if not self.get_param('youtube_include_dash_manifest', True):

3276

skip_manifests.append('dash')

3277

get_dash = 'dash' not in skip_manifests and (

3278

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3279

get_hls = not live_from_start and 'hls' not in skip_manifests

3280

3281

def process_manifest_format(f, proto, itag):

3282

if itag in itags:

3283

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3284

return False

3285

itag = f'{itag}-{proto}'

3286

if itag:

3287

f['format_id'] = itag

3288

itags[itag] = proto

3289

3290

f['quality'] = next((

3291

q(qdict[val])

3292

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3293

if val in qdict), -1)

3294

return True

3295

3296

for sd in streaming_data:

3297

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3298

if hls_manifest_url:

3299

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3300

if process_manifest_format(f, 'hls', self._search_regex(

3301

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3302

yield f

3303

3304

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3305

if dash_manifest_url:

3306

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3307

if process_manifest_format(f, 'dash', f['format_id']):

3308

f['filesize'] = int_or_none(self._search_regex(

3309

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3310

if live_from_start:

3311

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3316

spec = get_first(

3317

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3318

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3323

args = args.split('#')

3324

counts = list(map(int_or_none, args[:5]))

3325

if len(args) != 8 or not all(counts):

3326

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3327

continue

3328

width, height, frame_count, cols, rows = counts

3329

N, sigh = args[6:]

3330

3331

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3332

fragment_count = frame_count / (cols * rows)

3333

fragment_duration = duration / fragment_count

3334

yield {

3335

'format_id': f'sb{i}',

3336

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3349

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3350

} for j in range(math.ceil(fragment_count))],

3351

}

3352

3353

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3354

webpage = None

3355

if 'webpage' not in self._configuration_arg('player_skip'):

3356

webpage = self._download_webpage(

3357

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3358

3359

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3360

3361

player_responses, player_url = self._extract_player_responses(

3362

self._get_requested_clients(url, smuggled_data),

3363

video_id, webpage, master_ytcfg)

3364

3365

return webpage, master_ytcfg, player_responses, player_url

3366

3367

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3368

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3369

is_live = get_first(video_details, 'isLive')

3370

if is_live is None:

3371

is_live = get_first(live_broadcast_details, 'isLiveNow')

3372

3373

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3374

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3375

3376

return live_broadcast_details, is_live, streaming_data, formats

3377

3378

def _real_extract(self, url):

3379

url, smuggled_data = unsmuggle_url(url, {})

3380

video_id = self._match_id(url)

3381

3382

base_url = self.http_scheme() + '//www.youtube.com/'

3383

webpage_url = base_url + 'watch?v=' + video_id

3384

3385

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3386

3387

playability_statuses = traverse_obj(

3388

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3389

3390

trailer_video_id = get_first(

3391

playability_statuses,

3392

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3393

expected_type=str)

3394

if trailer_video_id:

3395

return self.url_result(

3396

trailer_video_id, self.ie_key(), trailer_video_id)

3397

3398

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3399

if webpage else (lambda x: None))

3400

3401

video_details = traverse_obj(

3402

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3403

microformats = traverse_obj(

3404

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3405

expected_type=dict, default=[])

3406

video_title = (

3407

get_first(video_details, 'title')

3408

or self._get_text(microformats, (..., 'title'))

3409

or search_meta(['og:title', 'twitter:title', 'title']))

3410

video_description = get_first(video_details, 'shortDescription')

3411

3412

multifeed_metadata_list = get_first(

3413

player_responses,

3414

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3415

expected_type=str)

3416

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3417

if self.get_param('noplaylist'):

3418

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3423

# Unquote should take place before split on comma (,) since textual

3424

# fields may contain comma as well (see

3425

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3426

feed_data = urllib.parse.parse_qs(

3427

urllib.parse.unquote_plus(feed))

3428

3429

def feed_entry(name):

3430

return try_get(

3431

feed_data, lambda x: x[name][0], str)

3432

3433

feed_id = feed_entry('id')

3434

if not feed_id:

3435

continue

3436

feed_title = feed_entry('title')

3437

title = video_title

3438

if feed_title:

3439

title += ' (%s)' % feed_title

3440

entries.append({

3441

'_type': 'url_transparent',

3442

'ie_key': 'Youtube',

3443

'url': smuggle_url(

3444

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3445

{'force_singlefeed': True}),

3446

'title': title,

3447

})

3448

feed_ids.append(feed_id)

3449

self.to_screen(

3450

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3451

% (', '.join(feed_ids), video_id))

3452

return self.playlist_result(

3453

entries, video_id, video_title, video_description)

3454

3455

duration = int_or_none(

3456

get_first(video_details, 'lengthSeconds')

3457

or get_first(microformats, 'lengthSeconds')

3458

or parse_duration(search_meta('duration'))) or None

3459

3460

if get_first(video_details, 'isPostLiveDvr'):

3461

self.write_debug('Video is in Post-Live Manifestless mode')

3462

if (duration or 0) > 4 * 3600:

3463

self.report_warning(

3464

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3465

'This is a known issue and patches are welcome')

3466

3467

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3468

video_id, microformats, video_details, player_responses, player_url, duration)

3469

3470

if not formats:

3471

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3472

self.report_drm(video_id)

3473

pemr = get_first(

3474

playability_statuses,

3475

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3476

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3477

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3478

if subreason:

3479

if subreason == 'The uploader has not made this video available in your country.':

3480

countries = get_first(microformats, 'availableCountries')

3481

if not countries:

3482

regions_allowed = search_meta('regionsAllowed')

3483

countries = regions_allowed.split(',') if regions_allowed else None

3484

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3485

reason += f'. {subreason}'

3486

if reason:

3487

self.raise_no_formats(reason, expected=True)

3488

3489

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3490

if not keywords and webpage:

3491

keywords = [

3492

unescapeHTML(m.group('content'))

3493

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3494

for keyword in keywords:

3495

if keyword.startswith('yt:stretch='):

3496

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3497

if mobj:

3498

# NB: float is intentional for forcing float division

3499

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3504

f['stretched_ratio'] = ratio

3505

break

3506

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3507

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3508

if thumbnail_url:

3509

thumbnails.append({

3510

'url': thumbnail_url,

3511

})

3512

original_thumbnails = thumbnails.copy()

3513

3514

# The best resolution thumbnails sometimes does not appear in the webpage

3515

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3516

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3517

thumbnail_names = [

3518

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3519

# in resolution, these are not the custom thumbnail. So de-prioritize them

3520

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3521

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3522

]

3523

n_thumbnail_names = len(thumbnail_names)

3524

thumbnails.extend({

3525

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3526

video_id=video_id, name=name, ext=ext,

3527

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3528

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3529

for thumb in thumbnails:

3530

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3531

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3532

self._remove_duplicate_formats(thumbnails)

3533

self._downloader._sort_thumbnails(original_thumbnails)

3534

3535

category = get_first(microformats, 'category') or search_meta('genre')

3536

channel_id = str_or_none(

3537

get_first(video_details, 'channelId')

3538

or get_first(microformats, 'externalChannelId')

3539

or search_meta('channelId'))

3540

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3541

3542

live_content = get_first(video_details, 'isLiveContent')

3543

is_upcoming = get_first(video_details, 'isUpcoming')

3544

if is_live is None:

3545

if is_upcoming or live_content is False:

3546

is_live = False

3547

if is_upcoming is None and (live_content or is_live):

3548

is_upcoming = False

3549

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3550

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3551

if not duration and live_end_time and live_start_time:

3552

duration = live_end_time - live_start_time

3553

3554

if is_live and self.get_param('live_from_start'):

3555

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3556

3557

formats.extend(self._extract_storyboard(player_responses, duration))

3558

3559

# Source is given priority since formats that throttle are given lower source_preference

3560

# When throttling issue is fully fixed, remove this

3561

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3566

'formats': formats,

3567

'thumbnails': thumbnails,

3568

# The best thumbnail that we are sure exists. Prevents unnecessary

3569

# URL checking if user don't care about getting the best possible thumbnail

3570

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3571

'description': video_description,

3572

'uploader': get_first(video_details, 'author'),

3573

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3574

'uploader_url': owner_profile_url,

3575

'channel_id': channel_id,

3576

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3577

'duration': duration,

3578

'view_count': int_or_none(

3579

get_first((video_details, microformats), (..., 'viewCount'))

3580

or search_meta('interactionCount')),

3581

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3582

'age_limit': 18 if (

3583

get_first(microformats, 'isFamilySafe') is False

3584

or search_meta('isFamilyFriendly') == 'false'

3585

or search_meta('og:restrictions:age') == '18+') else 0,

3586

'webpage_url': webpage_url,

3587

'categories': [category] if category else None,

3588

'tags': keywords,

3589

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3590

'is_live': is_live,

3591

'was_live': (False if is_live or is_upcoming or live_content is False

3592

else None if is_live is None or is_upcoming is None

3593

else live_content),

3594

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3595

'release_timestamp': live_start_time,

3596

}

3597

3598

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3599

if pctr:

3600

def get_lang_code(track):

3601

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3602

or track.get('languageCode'))

3603

3604

# Converted into dicts to remove duplicates

3605

captions = {

3606

get_lang_code(sub): sub

3607

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3608

translation_languages = {

3609

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3610

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3611

3612

def process_language(container, base_url, lang_code, sub_name, query):

3613

lang_subs = container.setdefault(lang_code, [])

3614

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3625

for lang_code, caption_track in captions.items():

3626

base_url = caption_track.get('baseUrl')

3627

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3628

if not base_url:

3629

continue

3630

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3631

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3636

if not caption_track.get('isTranslatable'):

3637

continue

3638

for trans_code, trans_name in translation_languages.items():

3639

if not trans_code:

3640

continue

3641

orig_trans_code = trans_code

3642

if caption_track.get('kind') != 'asr':

3643

if 'translated_subs' in self._configuration_arg('skip'):

3644

continue

3645

trans_code += f'-{lang_code}'

3646

trans_name += format_field(lang_name, None, ' from %s')

3647

# Add an "-orig" label to the original language so that it can be distinguished.

3648

# The subs are returned without "-orig" as well for compatibility

3649

if lang_code == f'a-{orig_trans_code}':

3650

process_language(

3651

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3652

# Setting tlang=lang returns damaged subtitles.

3653

process_language(automatic_captions, base_url, trans_code, trans_name,

3654

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3655

info['automatic_captions'] = automatic_captions

3656

info['subtitles'] = subtitles

3657

3658

parsed_url = urllib.parse.urlparse(url)

3659

for component in [parsed_url.fragment, parsed_url.query]:

3660

query = urllib.parse.parse_qs(component)

3661

for k, v in query.items():

3662

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3663

d_k += '_time'

3664

if d_k not in info and k in s_ks:

3665

info[d_k] = parse_duration(query[k][0])

3666

3667

# Youtube Music Auto-generated description

3668

if video_description:

3669

mobj = re.search(

3670

r'''(?xs)

3671

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3672

(?P<album>[^\n]+)

3673

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3674

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3675

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3676

.+\nAuto-generated\ by\ YouTube\.\s*$

3677

''', video_description)

3678

if mobj:

3679

release_year = mobj.group('release_year')

3680

release_date = mobj.group('release_date')

3681

if release_date:

3682

release_date = release_date.replace('-', '')

3683

if not release_year:

3684

release_year = release_date[:4]

3685

info.update({

3686

'album': mobj.group('album'.strip()),

3687

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3688

'track': mobj.group('track').strip(),

3689

'release_date': release_date,

3690

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3696

if not initial_data:

3697

query = {'videoId': video_id}

3698

query.update(self._get_checkok_params())

3699

initial_data = self._extract_response(

3700

item_id=video_id, ep='next', fatal=False,

3701

ytcfg=master_ytcfg, query=query,

3702

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3703

note='Downloading initial data API JSON')

3704

3705

info['comment_count'] = traverse_obj(initial_data, (

3706

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3707

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3708

), (

3709

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3710

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3711

), expected_type=int_or_none, get_all=False)

3712

3713

try: # This will error if there is no livechat

3714

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3715

except (KeyError, IndexError, TypeError):

3716

pass

3717

else:

3718

info.setdefault('subtitles', {})['live_chat'] = [{

3719

# url is needed to set cookies

3720

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3721

'video_id': video_id,

3722

'ext': 'json',

3723

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3729

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3730

or self._extract_chapters_from_description(video_description, duration)

3731

or None)

3732

3733

contents = traverse_obj(

3734

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3735

expected_type=list, default=[])

3736

3737

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3738

if vpir:

3739

stl = vpir.get('superTitleLink')

3740

if stl:

3741

stl = self._get_text(stl)

3742

if try_get(

3743

vpir,

3744

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3745

info['location'] = stl

3746

else:

3747

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3748

if mobj:

3749

info.update({

3750

'series': mobj.group(1),

3751

'season_number': int(mobj.group(2)),

3752

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3757

list) or []):

3758

tbr = tlb.get('toggleButtonRenderer') or {}

3759

for getter, regex in [(

3760

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3761

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3762

lambda x: x['accessibility'],

3763

lambda x: x['accessibilityData']['accessibilityData'],

3764

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3765

label = (try_get(tbr, getter, dict) or {}).get('label')

3766

if label:

3767

mobj = re.match(regex, label)

3768

if mobj:

3769

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3770

break

3771

sbr_tooltip = try_get(

3772

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3773

if sbr_tooltip:

3774

like_count, dislike_count = sbr_tooltip.split(' / ')

3775

info.update({

3776

'like_count': str_to_int(like_count),

3777

'dislike_count': str_to_int(dislike_count),

3778

})

3779

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3780

if vsir:

3781

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3782

info.update({

3783

'channel': self._get_text(vor, 'title'),

3784

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3789

list) or []

3790

multiple_songs = False

3791

for row in rows:

3792

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3793

multiple_songs = True

3794

break

3795

for row in rows:

3796

mrr = row.get('metadataRowRenderer') or {}

3797

mrr_title = mrr.get('title')

3798

if not mrr_title:

3799

continue

3800

mrr_title = self._get_text(mrr, 'title')

3801

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3802

if mrr_title == 'License':

3803

info['license'] = mrr_contents_text

3804

elif not multiple_songs:

3805

if mrr_title == 'Album':

3806

info['album'] = mrr_contents_text

3807

elif mrr_title == 'Artist':

3808

info['artist'] = mrr_contents_text

3809

elif mrr_title == 'Song':

3810

info['track'] = mrr_contents_text

3811

3812

fallbacks = {

3813

'channel': 'uploader',

3814

'channel_id': 'uploader_id',

3815

'channel_url': 'uploader_url',

3816

}

3817

3818

# The upload date for scheduled, live and past live streams / premieres in microformats

3819

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3820

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3821

upload_date = (

3822

unified_strdate(get_first(microformats, 'uploadDate'))

3823

or unified_strdate(search_meta('uploadDate')))

3824

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3825

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3826

info['upload_date'] = upload_date

3827

3828

for to, frm in fallbacks.items():

3829

if not info.get(to):

3830

info[to] = info.get(frm)

3831

3832

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3838

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3839

is_membersonly = None

3840

is_premium = None

3841

if initial_data and is_private is not None:

3842

is_membersonly = False

3843

is_premium = False

3844

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3845

badge_labels = set()

3846

for content in contents:

3847

if not isinstance(content, dict):

3848

continue

3849

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3850

for badge_label in badge_labels:

3851

if badge_label.lower() == 'members only':

3852

is_membersonly = True

3853

elif badge_label.lower() == 'premium':

3854

is_premium = True

3855

elif badge_label.lower() == 'unlisted':

3856

is_unlisted = True

3857

3858

info['availability'] = self._availability(

3859

is_private=is_private,

3860

needs_premium=is_premium,

3861

needs_subscription=is_membersonly,

3862

needs_auth=info['age_limit'] >= 18,

3863

is_unlisted=None if is_private is None else is_unlisted)

3864

3865

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3866

3867

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3873

3874

@staticmethod

3875

def passthrough_smuggled_data(func):

3876

def _smuggle(entries, smuggled_data):

3877

for entry in entries:

3878

# TODO: Convert URL to music.youtube instead.

3879

# Do we need to passthrough any other smuggled_data?

3880

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3881

yield entry

3882

3883

@functools.wraps(func)

3884

def wrapper(self, url):

3885

url, smuggled_data = unsmuggle_url(url, {})

3886

if self.is_music_url(url):

3887

smuggled_data['is_music_url'] = True

3888

info_dict = func(self, url, smuggled_data)

3889

if smuggled_data and info_dict.get('entries'):

3890

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3895

channel_id = self._html_search_meta(

3896

'channelId', webpage, 'channel id', default=None)

3897

if channel_id:

3898

return channel_id

3899

channel_url = self._html_search_meta(

3900

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3901

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3902

'twitter:app:url:googleplay'), webpage, 'channel url')

3903

return self._search_regex(

3904

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3905

channel_url, 'channel id')

3906

3907

@staticmethod

3908

def _extract_basic_item_renderer(item):

3909

# Modified from _extract_grid_item_renderer

3910

known_basic_renderers = (

3911

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3912

)

3913

for key, renderer in item.items():

3914

if not isinstance(renderer, dict):

3915

continue

3916

elif key in known_basic_renderers:

3917

return renderer

3918

elif key.startswith('grid') and key.endswith('Renderer'):

3919

return renderer

3920

3921

def _grid_entries(self, grid_renderer):

3922

for item in grid_renderer['items']:

3923

if not isinstance(item, dict):

3924

continue

3925

renderer = self._extract_basic_item_renderer(item)

3926

if not isinstance(renderer, dict):

3927

continue

3928

title = self._get_text(renderer, 'title')

3929

3930

# playlist

3931

playlist_id = renderer.get('playlistId')

3932

if playlist_id:

3933

yield self.url_result(

3934

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3935

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3940

if video_id:

3941

yield self._extract_video(renderer)

3942

continue

3943

# channel

3944

channel_id = renderer.get('channelId')

3945

if channel_id:

3946

yield self.url_result(

3947

'https://www.youtube.com/channel/%s' % channel_id,

3948

ie=YoutubeTabIE.ie_key(), video_title=title)

3949

continue

3950

# generic endpoint URL support

3951

ep_url = urljoin('https://www.youtube.com/', try_get(

3952

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3953

str))

3954

if ep_url:

3955

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3956

if ie.suitable(ep_url):

3957

yield self.url_result(

3958

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3959

break

3960

3961

def _music_reponsive_list_entry(self, renderer):

3962

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3963

if video_id:

3964

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3965

ie=YoutubeIE.ie_key(), video_id=video_id)

3966

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3967

if playlist_id:

3968

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3969

if video_id:

3970

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3971

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3972

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3973

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3974

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3975

if browse_id:

3976

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3977

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3978

3979

def _shelf_entries_from_content(self, shelf_renderer):

3980

content = shelf_renderer.get('content')

3981

if not isinstance(content, dict):

3982

return

3983

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3984

if renderer:

3985

# TODO: add support for nested playlists so each shelf is processed

3986

# as separate playlist

3987

# TODO: this includes only first N items

3988

yield from self._grid_entries(renderer)

3989

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3995

ep = try_get(

3996

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3997

str)

3998

shelf_url = urljoin('https://www.youtube.com', ep)

3999

if shelf_url:

4000

# Skipping links to another channels, note that checking for

4001

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4002

# will not work

4003

if skip_channels and '/channels?' in shelf_url:

4004

return

4005

title = self._get_text(shelf_renderer, 'title')

4006

yield self.url_result(shelf_url, video_title=title)

4007

# Shelf may not contain shelf URL, fallback to extraction from content

4008

yield from self._shelf_entries_from_content(shelf_renderer)

4009

4010

def _playlist_entries(self, video_list_renderer):

4011

for content in video_list_renderer['contents']:

4012

if not isinstance(content, dict):

4013

continue

4014

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4015

if not isinstance(renderer, dict):

4016

continue

4017

video_id = renderer.get('videoId')

4018

if not video_id:

4019

continue

4020

yield self._extract_video(renderer)

4021

4022

def _rich_entries(self, rich_grid_renderer):

4023

renderer = try_get(

4024

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4025

video_id = renderer.get('videoId')

4026

if not video_id:

4027

return

4028

yield self._extract_video(renderer)

4029

4030

def _video_entry(self, video_renderer):

4031

video_id = video_renderer.get('videoId')

4032

if video_id:

4033

return self._extract_video(video_renderer)

4034

4035

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4036

url = urljoin('https://youtube.com', traverse_obj(

4037

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4038

if url:

4039

return self.url_result(

4040

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4041

4042

def _post_thread_entries(self, post_thread_renderer):

4043

post_renderer = try_get(

4044

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4045

if not post_renderer:

4046

return

4047

# video attachment

4048

video_renderer = try_get(

4049

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4050

video_id = video_renderer.get('videoId')

4051

if video_id:

4052

entry = self._extract_video(video_renderer)

4053

if entry:

4054

yield entry

4055

# playlist attachment

4056

playlist_id = try_get(

4057

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4058

if playlist_id:

4059

yield self.url_result(

4060

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4061

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4062

# inline video links

4063

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4064

for run in runs:

4065

if not isinstance(run, dict):

4066

continue

4067

ep_url = try_get(

4068

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4069

if not ep_url:

4070

continue

4071

if not YoutubeIE.suitable(ep_url):

4072

continue

4073

ep_video_id = YoutubeIE._match_id(ep_url)

4074

if video_id == ep_video_id:

4075

continue

4076

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4077

4078

def _post_thread_continuation_entries(self, post_thread_continuation):

4079

contents = post_thread_continuation.get('contents')

4080

if not isinstance(contents, list):

4081

return

4082

for content in contents:

4083

renderer = content.get('backstagePostThreadRenderer')

4084

if isinstance(renderer, dict):

4085

yield from self._post_thread_entries(renderer)

4086

continue

4087

renderer = content.get('videoRenderer')

4088

if isinstance(renderer, dict):

4089

yield self._video_entry(renderer)

4090

4091

r''' # unused

4092

def _rich_grid_entries(self, contents):

4093

for content in contents:

4094

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4095

if video_renderer:

4096

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4102

# continuation_list is modified in-place with continuation_list = [continuation_token]

4103

continuation_list[:] = [None]

4104

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4105

for content in contents:

4106

if not isinstance(content, dict):

4107

continue

4108

is_renderer = traverse_obj(

4109

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4110

expected_type=dict)

4111

if not is_renderer:

4112

renderer = content.get('richItemRenderer')

4113

if renderer:

4114

for entry in self._rich_entries(renderer):

4115

yield entry

4116

continuation_list[0] = self._extract_continuation(parent_renderer)

4117

continue

4118

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4119

for isr_content in isr_contents:

4120

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4125

'gridRenderer': self._grid_entries,

4126

'reelShelfRenderer': self._grid_entries,

4127

'shelfRenderer': self._shelf_entries,

4128

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4129

'backstagePostThreadRenderer': self._post_thread_entries,

4130

'videoRenderer': lambda x: [self._video_entry(x)],

4131

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4132

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4133

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4134

}

4135

for key, renderer in isr_content.items():

4136

if key not in known_renderers:

4137

continue

4138

for entry in known_renderers[key](renderer):

4139

if entry:

4140

yield entry

4141

continuation_list[0] = self._extract_continuation(renderer)

4142

break

4143

4144

if not continuation_list[0]:

4145

continuation_list[0] = self._extract_continuation(is_renderer)

4146

4147

if not continuation_list[0]:

4148

continuation_list[0] = self._extract_continuation(parent_renderer)

4149

4150

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4151

continuation_list = [None]

4152

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4153

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4158

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4159

yield from extract_entries(parent_renderer)

4160

continuation = continuation_list[0]

4161

4162

for page_num in itertools.count(1):

4163

if not continuation:

4164

break

4165

headers = self.generate_api_headers(

4166

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4167

response = self._extract_response(

4168

item_id=f'{item_id} page {page_num}',

4169

query=continuation, headers=headers, ytcfg=ytcfg,

4170

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4175

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4176

visitor_data = self._extract_visitor_data(response) or visitor_data

4177

4178

known_continuation_renderers = {

4179

'playlistVideoListContinuation': self._playlist_entries,

4180

'gridContinuation': self._grid_entries,

4181

'itemSectionContinuation': self._post_thread_continuation_entries,

4182

'sectionListContinuation': extract_entries, # for feeds

4183

}

4184

continuation_contents = try_get(

4185

response, lambda x: x['continuationContents'], dict) or {}

4186

continuation_renderer = None

4187

for key, value in continuation_contents.items():

4188

if key not in known_continuation_renderers:

4189

continue

4190

continuation_renderer = value

4191

continuation_list = [None]

4192

yield from known_continuation_renderers[key](continuation_renderer)

4193

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4194

break

4195

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4200

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4201

'gridVideoRenderer': (self._grid_entries, 'items'),

4202

'gridChannelRenderer': (self._grid_entries, 'items'),

4203

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4204

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4205

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4206

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4207

}

4208

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4209

continuation_items = try_get(

4210

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4211

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4212

video_items_renderer = None

4213

for key, value in continuation_item.items():

4214

if key not in known_renderers:

4215

continue

4216

video_items_renderer = {known_renderers[key][1]: continuation_items}

4217

continuation_list = [None]

4218

yield from known_renderers[key][0](video_items_renderer)

4219

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4220

break

4221

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4227

for tab in tabs:

4228

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4229

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4234

4235

def _extract_uploader(self, data):

4236

uploader = {}

4237

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4238

owner = try_get(

4239

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4240

if owner:

4241

owner_text = owner.get('text')

4242

uploader['uploader'] = self._search_regex(

4243

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4244

uploader['uploader_id'] = try_get(

4245

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4246

uploader['uploader_url'] = urljoin(

4247

'https://www.youtube.com/',

4248

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4249

return {k: v for k, v in uploader.items() if v is not None}

4250

4251

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4252

playlist_id = title = description = channel_url = channel_name = channel_id = None

4253

tags = []

4254

4255

selected_tab = self._extract_selected_tab(tabs)

4256

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4257

renderer = try_get(

4258

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4259

if renderer:

4260

channel_name = renderer.get('title')

4261

channel_url = renderer.get('channelUrl')

4262

channel_id = renderer.get('externalId')

4263

else:

4264

renderer = try_get(

4265

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4266

4267

if renderer:

4268

title = renderer.get('title')

4269

description = renderer.get('description', '')

4270

playlist_id = channel_id

4271

tags = renderer.get('keywords', '').split()

4272

4273

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4274

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4275

def _get_uncropped(url):

4276

return url_or_none((url or '').split('=')[0] + '=s0')

4277

4278

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4279

if avatar_thumbnails:

4280

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4281

if uncropped_avatar:

4282

avatar_thumbnails.append({

4283

'url': uncropped_avatar,

4284

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4289

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4290

for banner in channel_banners:

4291

banner['preference'] = -10

4292

4293

if channel_banners:

4294

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4295

if uncropped_banner:

4296

channel_banners.append({

4297

'url': uncropped_banner,

4298

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4303

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4304

4305

if playlist_id is None:

4306

playlist_id = item_id

4307

4308

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4309

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4310

if title is None:

4311

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4312

title += format_field(selected_tab, 'title', ' - %s')

4313

title += format_field(selected_tab, 'expandedText', ' - %s')

4314

4315

metadata = {

4316

'playlist_id': playlist_id,

4317

'playlist_title': title,

4318

'playlist_description': description,

4319

'uploader': channel_name,

4320

'uploader_id': channel_id,

4321

'uploader_url': channel_url,

4322

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4323

'tags': tags,

4324

'view_count': self._get_count(playlist_stats, 1),

4325

'availability': self._extract_availability(data),

4326

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4327

'playlist_count': self._get_count(playlist_stats, 0),

4328

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4329

}

4330

if not channel_id:

4331

metadata.update(self._extract_uploader(data))

4332

metadata.update({

4333

'channel': metadata['uploader'],

4334

'channel_id': metadata['uploader_id'],

4335

'channel_url': metadata['uploader_url']})

4336

return self.playlist_result(

4337

self._entries(

4338

selected_tab, playlist_id, ytcfg,

4339

self._extract_account_syncid(ytcfg, data),

4340

self._extract_visitor_data(data, ytcfg)),

4341

**metadata)

4342

4343

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4344

first_id = last_id = response = None

4345

for page_num in itertools.count(1):

4346

videos = list(self._playlist_entries(playlist))

4347

if not videos:

4348

return

4349

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4350

if start >= len(videos):

4351

return

4352

yield from videos[start:]

4353

first_id = first_id or videos[0]['id']

4354

last_id = videos[-1]['id']

4355

watch_endpoint = try_get(

4356

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4357

headers = self.generate_api_headers(

4358

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4359

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4360

query = {

4361

'playlistId': playlist_id,

4362

'videoId': watch_endpoint.get('videoId') or last_id,

4363

'index': watch_endpoint.get('index') or len(videos),

4364

'params': watch_endpoint.get('params') or 'OAE%3D'

4365

}

4366

response = self._extract_response(

4367

item_id='%s page %d' % (playlist_id, page_num),

4368

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4369

check_get_keys='contents'

4370

)

4371

playlist = try_get(

4372

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4373

4374

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4375

title = playlist.get('title') or try_get(

4376

data, lambda x: x['titleText']['simpleText'], str)

4377

playlist_id = playlist.get('playlistId') or item_id

4378

4379

# Delegating everything except mix playlists to regular tab-based playlist URL

4380

playlist_url = urljoin(url, try_get(

4381

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4382

str))

4383

4384

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4385

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4386

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4387

4388

if playlist_url and playlist_url != url and not is_known_unviewable:

4389

return self.url_result(

4390

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4391

video_title=title)

4392

4393

return self.playlist_result(

4394

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4395

playlist_id=playlist_id, playlist_title=title)

4396

4397

def _extract_availability(self, data):

4398

"""

4399

Gets the availability of a given playlist/tab.

4400

Note: Unless YouTube tells us explicitly, we do not assume it is public

4401

@param data: response

4402

"""

4403

is_private = is_unlisted = None

4404

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4405

badge_labels = self._extract_badges(renderer)

4406

4407

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4408

privacy_dropdown_entries = try_get(

4409

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4410

for renderer_dict in privacy_dropdown_entries:

4411

is_selected = try_get(

4412

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4413

if not is_selected:

4414

continue

4415

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4416

if label:

4417

badge_labels.add(label.lower())

4418

break

4419

4420

for badge_label in badge_labels:

4421

if badge_label == 'unlisted':

4422

is_unlisted = True

4423

elif badge_label == 'private':

4424

is_private = True

4425

elif badge_label == 'public':

4426

is_unlisted = is_private = False

4427

return self._availability(is_private, False, False, False, is_unlisted)

4428

4429

@staticmethod

4430

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4431

sidebar_renderer = try_get(

4432

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4433

for item in sidebar_renderer:

4434

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4439

"""

4440

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4441

"""

4442

browse_id = params = None

4443

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4444

if not renderer:

4445

return

4446

menu_renderer = try_get(

4447

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4448

for menu_item in menu_renderer:

4449

if not isinstance(menu_item, dict):

4450

continue

4451

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4452

text = try_get(

4453

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4454

if not text or text.lower() != 'show unavailable videos':

4455

continue

4456

browse_endpoint = try_get(

4457

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4458

browse_id = browse_endpoint.get('browseId')

4459

params = browse_endpoint.get('params')

4460

break

4461

4462

headers = self.generate_api_headers(

4463

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4464

visitor_data=self._extract_visitor_data(data, ytcfg))

4465

query = {

4466

'params': params or 'wgYCCAA=',

4467

'browseId': browse_id or 'VL%s' % item_id

4468

}

4469

return self._extract_response(

4470

item_id=item_id, headers=headers, query=query,

4471

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4472

note='Downloading API JSON with unavailable videos')

4473

4474

@functools.cached_property

4475

def skip_webpage(self):

4476

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4477

4478

def _extract_webpage(self, url, item_id, fatal=True):

4479

retries = self.get_param('extractor_retries', 3)

4480

count = -1

4481

webpage = data = last_error = None

4482

while count < retries:

4483

count += 1

4484

# Sometimes youtube returns a webpage with incomplete ytInitialData

4485

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4486

if last_error:

4487

self.report_warning('%s. Retrying ...' % last_error)

4488

try:

4489

webpage = self._download_webpage(

4490

url, item_id,

4491

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4492

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4493

except ExtractorError as e:

4494

if isinstance(e.cause, network_exceptions):

4495

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4496

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4506

except ExtractorError as e:

4507

if fatal:

4508

raise

4509

self.report_warning(error_to_compat_str(e))

4510

break

4511

4512

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4513

break

4514

4515

last_error = 'Incomplete yt initial data received'

4516

if count >= retries:

4517

if fatal:

4518

raise ExtractorError(last_error)

4519

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4525

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4526

if not ytcfg and self.is_authenticated:

4527

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4528

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4529

raise ExtractorError(

4530

f'{msg}. If you are not downloading private content, or '

4531

'your cookies are only for the first account and channel,'

4532

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4533

expected=True)

4534

self.report_warning(msg, only_once=True)

4535

4536

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4537

data = None

4538

if not self.skip_webpage:

4539

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4540

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4541

# Reject webpage data if redirected to home page without explicitly requesting

4542

selected_tab = self._extract_selected_tab(traverse_obj(

4543

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4544

if (url != 'https://www.youtube.com/feed/recommended'

4545

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4546

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4547

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4548

if fatal:

4549

raise ExtractorError(msg, expected=True)

4550

self.report_warning(msg, only_once=True)

4551

if not data:

4552

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4553

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4554

return data, ytcfg

4555

4556

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4557

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4558

resolve_response = self._extract_response(

4559

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4560

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4561

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4562

for ep_key, ep in endpoints.items():

4563

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4564

if params:

4565

return self._extract_response(

4566

item_id=item_id, query=params, ep=ep, headers=headers,

4567

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4568

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4569

err_note = 'Failed to resolve url (does the playlist exist?)'

4570

if fatal:

4571

raise ExtractorError(err_note, expected=True)

4572

self.report_warning(err_note, item_id)

4573

4574

_SEARCH_PARAMS = None

4575

4576

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4577

data = {'query': query}

4578

if params is NO_DEFAULT:

4579

params = self._SEARCH_PARAMS

4580

if params:

4581

data['params'] = params

4582

4583

content_keys = (

4584

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4585

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4586

# ytmusic search

4587

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4588

('continuationContents', ),

4589

)

4590

display_id = f'query "{query}"'

4591

check_get_keys = tuple({keys[0] for keys in content_keys})

4592

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4593

self._report_playlist_authcheck(ytcfg, fatal=False)

4594

4595

continuation_list = [None]

4596

search = None

4597

for page_num in itertools.count(1):

4598

data.update(continuation_list[0] or {})

4599

headers = self.generate_api_headers(

4600

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4601

search = self._extract_response(

4602

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4603

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4604

slr_contents = traverse_obj(search, *content_keys)

4605

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4606

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4611

IE_DESC = 'YouTube Tabs'

4612

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4621

(?P<not_channel>

4622

feed/|hashtag/|

4623

(?:playlist|watch)\?.*?\blist=

4624

)|

4625

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4630

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4631

}

4632

IE_NAME = 'youtube:tab'

4633

4634

_TESTS = [{

4635

'note': 'playlists, multipage',

4636

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4637

'playlist_mincount': 94,

4638

'info_dict': {

4639

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4640

'title': 'Igor Kleiner - Playlists',

4641

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4642

'uploader': 'Igor Kleiner',

4643

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4644

'channel': 'Igor Kleiner',

4645

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4646

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4647

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4648

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4649

'channel_follower_count': int

4650

},

4651

}, {

4652

'note': 'playlists, multipage, different order',

4653

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4654

'playlist_mincount': 94,

4655

'info_dict': {

4656

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4657

'title': 'Igor Kleiner - Playlists',

4658

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4659

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4660

'uploader': 'Igor Kleiner',

4661

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4662

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4663

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4664

'channel': 'Igor Kleiner',

4665

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4666

'channel_follower_count': int

4667

},

4668

}, {

4669

'note': 'playlists, series',

4670

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4671

'playlist_mincount': 5,

4672

'info_dict': {

4673

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4674

'title': '3Blue1Brown - Playlists',

4675

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4676

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4677

'uploader': '3Blue1Brown',

4678

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4679

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4680

'channel': '3Blue1Brown',

4681

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4682

'tags': ['Mathematics'],

4683

'channel_follower_count': int

4684

},

4685

}, {

4686

'note': 'playlists, singlepage',

4687

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4688

'playlist_mincount': 4,

4689

'info_dict': {

4690

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4691

'title': 'ThirstForScience - Playlists',

4692

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4693

'uploader': 'ThirstForScience',

4694

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4695

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4696

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4697

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4698

'tags': 'count:13',

4699

'channel': 'ThirstForScience',

4700

'channel_follower_count': int

4701

}

4702

}, {

4703

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4704

'only_matching': True,

4705

}, {

4706

'note': 'basic, single video playlist',

4707

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4708

'info_dict': {

4709

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4710

'uploader': 'Sergey M.',

4711

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4712

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4717

'channel': 'Sergey M.',

4718

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4719

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4720

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4725

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4726

'info_dict': {

4727

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4728

'uploader': 'Sergey M.',

4729

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4730

'title': 'youtube-dl empty playlist',

4731

'tags': [],

4732

'channel': 'Sergey M.',

4733

'description': '',

4734

'modified_date': '20160902',

4735

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4736

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4737

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4743

'info_dict': {

4744

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4745

'title': 'lex will - Home',

4746

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4747

'uploader': 'lex will',

4748

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4749

'channel': 'lex will',

4750

'tags': ['bible', 'history', 'prophesy'],

4751

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4752

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4753

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4754

'channel_follower_count': int

4755

},

4756

'playlist_mincount': 2,

4757

}, {

4758

'note': 'Videos tab',

4759

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4760

'info_dict': {

4761

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4762

'title': 'lex will - Videos',

4763

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4764

'uploader': 'lex will',

4765

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4766

'tags': ['bible', 'history', 'prophesy'],

4767

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4768

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4769

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4770

'channel': 'lex will',

4771

'channel_follower_count': int

4772

},

4773

'playlist_mincount': 975,

4774

}, {

4775

'note': 'Videos tab, sorted by popular',

4776

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4777

'info_dict': {

4778

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4779

'title': 'lex will - Videos',

4780

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4781

'uploader': 'lex will',

4782

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4784

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4785

'channel': 'lex will',

4786

'tags': ['bible', 'history', 'prophesy'],

4787

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4788

'channel_follower_count': int

4789

},

4790

'playlist_mincount': 199,

4791

}, {

4792

'note': 'Playlists tab',

4793

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4794

'info_dict': {

4795

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4796

'title': 'lex will - Playlists',

4797

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4798

'uploader': 'lex will',

4799

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4801

'channel': 'lex will',

4802

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4803

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4804

'tags': ['bible', 'history', 'prophesy'],

4805

'channel_follower_count': int

4806

},

4807

'playlist_mincount': 17,

4808

}, {

4809

'note': 'Community tab',

4810

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4811

'info_dict': {

4812

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4813

'title': 'lex will - Community',

4814

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4815

'uploader': 'lex will',

4816

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4817

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4818

'channel': 'lex will',

4819

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4820

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4821

'tags': ['bible', 'history', 'prophesy'],

4822

'channel_follower_count': int

4823

},

4824

'playlist_mincount': 18,

4825

}, {

4826

'note': 'Channels tab',

4827

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4828

'info_dict': {

4829

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4830

'title': 'lex will - Channels',

4831

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4832

'uploader': 'lex will',

4833

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4834

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4835

'channel': 'lex will',

4836

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4837

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4838

'tags': ['bible', 'history', 'prophesy'],

4839

'channel_follower_count': int

4840

},

4841

'playlist_mincount': 12,

4842

}, {

4843

'note': 'Search tab',

4844

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4845

'playlist_mincount': 40,

4846

'info_dict': {

4847

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4848

'title': '3Blue1Brown - Search - linear algebra',

4849

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4850

'uploader': '3Blue1Brown',

4851

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4852

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4853

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4854

'tags': ['Mathematics'],

4855

'channel': '3Blue1Brown',

4856

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4857

'channel_follower_count': int

4858

},

4859

}, {

4860

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4861

'only_matching': True,

4862

}, {

4863

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4864

'only_matching': True,

4865

}, {

4866

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4867

'only_matching': True,

4868

}, {

4869

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4870

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4871

'info_dict': {

4872

'title': '29C3: Not my department',

4873

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4874

'uploader': 'Christiaan008',

4875

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4876

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4877

'tags': [],

4878

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4879

'view_count': int,

4880

'modified_date': '20150605',

4881

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4882

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4883

'channel': 'Christiaan008',

4884

},

4885

'playlist_count': 96,

4886

}, {

4887

'note': 'Large playlist',

4888

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4889

'info_dict': {

4890

'title': 'Uploads from Cauchemar',

4891

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4892

'uploader': 'Cauchemar',

4893

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4894

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4895

'tags': [],

4896

'modified_date': r're:\d{8}',

4897

'channel': 'Cauchemar',

4898

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4899

'view_count': int,

4900

'description': '',

4901

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4902

},

4903

'playlist_mincount': 1123,

4904

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4905

}, {

4906

'note': 'even larger playlist, 8832 videos',

4907

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4908

'only_matching': True,

4909

}, {

4910

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4911

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4912

'info_dict': {

4913

'title': 'Uploads from Interstellar Movie',

4914

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4915

'uploader': 'Interstellar Movie',

4916

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4917

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4918

'tags': [],

4919

'view_count': int,

4920

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4921

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4922

'channel': 'Interstellar Movie',

4923

'description': '',

4924

'modified_date': r're:\d{8}',

4925

},

4926

'playlist_mincount': 21,

4927

}, {

4928

'note': 'Playlist with "show unavailable videos" button',

4929

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4930

'info_dict': {

4931

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4932

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4933

'uploader': 'Phim Siêu Nhân Nhật Bản',

4934

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4935

'view_count': int,

4936

'channel': 'Phim Siêu Nhân Nhật Bản',

4937

'tags': [],

4938

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4939

'description': '',

4940

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4941

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4942

'modified_date': r're:\d{8}',

4943

},

4944

'playlist_mincount': 200,

4945

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4946

}, {

4947

'note': 'Playlist with unavailable videos in page 7',

4948

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4949

'info_dict': {

4950

'title': 'Uploads from BlankTV',

4951

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4952

'uploader': 'BlankTV',

4953

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4954

'channel': 'BlankTV',

4955

'channel_url': 'https://www.youtube.com/c/blanktv',

4956

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4957

'view_count': int,

4958

'tags': [],

4959

'uploader_url': 'https://www.youtube.com/c/blanktv',

4960

'modified_date': r're:\d{8}',

4961

'description': '',

4962

},

4963

'playlist_mincount': 1000,

4964

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4965

}, {

4966

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4967

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4968

'info_dict': {

4969

'title': 'Data Analysis with Dr Mike Pound',

4970

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4971

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4972

'uploader': 'Computerphile',

4973

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4974

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4975

'tags': [],

4976

'view_count': int,

4977

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4978

'channel_url': 'https://www.youtube.com/user/Computerphile',

4979

'channel': 'Computerphile',

4980

},

4981

'playlist_mincount': 11,

4982

}, {

4983

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4984

'only_matching': True,

4985

}, {

4986

'note': 'Playlist URL that does not actually serve a playlist',

4987

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4992

'uploader': 'STREEM',

4993

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4994

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4995

'upload_date': '20150526',

4996

'license': 'Standard YouTube License',

4997

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4998

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5005

},

5006

'skip': 'This video is not available.',

5007

'add_ie': [YoutubeIE.ie_key()],

5008

}, {

5009

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5010

'only_matching': True,

5011

}, {

5012

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5013

'only_matching': True,

5014

}, {

5015

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5016

'info_dict': {

5017

'id': 'Wq15eF5vCbI', # This will keep changing

5018

'ext': 'mp4',

5019

'title': str,

5020

'uploader': 'Sky News',

5021

'uploader_id': 'skynews',

5022

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5023

'upload_date': r're:\d{8}',

5024

'description': str,

5025

'categories': ['News & Politics'],

5026

'tags': list,

5027

'like_count': int,

5028

'release_timestamp': 1642502819,

5029

'channel': 'Sky News',

5030

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5031

'age_limit': 0,

5032

'view_count': int,

5033

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5034

'playable_in_embed': True,

5035

'release_date': '20220118',

5036

'availability': 'public',

5037

'live_status': 'is_live',

5038

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5039

'channel_follower_count': int

5040

},

5041

'params': {

5042

'skip_download': True,

5043

},

5044

'expected_warnings': ['Ignoring subtitle tracks found in '],

5045

}, {

5046

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5051

'uploader': 'The Young Turks',

5052

'uploader_id': 'TheYoungTurks',

5053

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5054

'upload_date': '20150715',

5055

'license': 'Standard YouTube License',

5056

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5057

'categories': ['News & Politics'],

5058

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5063

},

5064

'only_matching': True,

5065

}, {

5066

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5067

'only_matching': True,

5068

}, {

5069

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5070

'only_matching': True,

5071

}, {

5072

'note': 'A channel that is not live. Should raise error',

5073

'url': 'https://www.youtube.com/user/numberphile/live',

5074

'only_matching': True,

5075

}, {

5076

'url': 'https://www.youtube.com/feed/trending',

5077

'only_matching': True,

5078

}, {

5079

'url': 'https://www.youtube.com/feed/library',

5080

'only_matching': True,

5081

}, {

5082

'url': 'https://www.youtube.com/feed/history',

5083

'only_matching': True,

5084

}, {

5085

'url': 'https://www.youtube.com/feed/subscriptions',

5086

'only_matching': True,

5087

}, {

5088

'url': 'https://www.youtube.com/feed/watch_later',

5089

'only_matching': True,

5090

}, {

5091

'note': 'Recommended - redirects to home page.',

5092

'url': 'https://www.youtube.com/feed/recommended',

5093

'only_matching': True,

5094

}, {

5095

'note': 'inline playlist with not always working continuations',

5096

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5097

'only_matching': True,

5098

}, {

5099

'url': 'https://www.youtube.com/course',

5100

'only_matching': True,

5101

}, {

5102

'url': 'https://www.youtube.com/zsecurity',

5103

'only_matching': True,

5104

}, {

5105

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5106

'only_matching': True,

5107

}, {

5108

'url': 'https://www.youtube.com/TheYoungTurks/live',

5109

'only_matching': True,

5110

}, {

5111

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5118

}, {

5119

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5120

'only_matching': True,

5121

}, {

5122

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5123

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5124

'only_matching': True

5125

}, {

5126

'note': '/browse/ should redirect to /channel/',

5127

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5128

'only_matching': True

5129

}, {

5130

'note': 'VLPL, should redirect to playlist?list=PL...',

5131

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5132

'info_dict': {

5133

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5134

'uploader': 'NoCopyrightSounds',

5135

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5136

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5137

'title': 'NCS : All Releases 💿',

5138

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5139

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5140

'modified_date': r're:\d{8}',

5141

'view_count': int,

5142

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5143

'tags': [],

5144

'channel': 'NoCopyrightSounds',

5145

},

5146

'playlist_mincount': 166,

5147

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5148

}, {

5149

'note': 'Topic, should redirect to playlist?list=UU...',

5150

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5151

'info_dict': {

5152

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5153

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5154

'title': 'Uploads from Royalty Free Music - Topic',

5155

'uploader': 'Royalty Free Music - Topic',

5156

'tags': [],

5157

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5158

'channel': 'Royalty Free Music - Topic',

5159

'view_count': int,

5160

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5161

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5162

'modified_date': r're:\d{8}',

5163

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5164

'description': '',

5165

},

5166

'expected_warnings': [

5167

'The URL does not have a videos tab',

5168

r'[Uu]navailable videos (are|will be) hidden',

5169

],

5170

'playlist_mincount': 101,

5171

}, {

5172

'note': 'Topic without a UU playlist',

5173

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5174

'info_dict': {

5175

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5176

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5177

'tags': [],

5178

},

5179

'expected_warnings': [

5180

'the playlist redirect gave error',

5181

],

5182

'playlist_mincount': 9,

5183

}, {

5184

'note': 'Youtube music Album',

5185

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5186

'info_dict': {

5187

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5188

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5193

'modified_date': r're:\d{8}',

5194

},

5195

'playlist_count': 50,

5196

}, {

5197

'note': 'unlisted single video playlist',

5198

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5199

'info_dict': {

5200

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5201

'uploader': 'colethedj',

5202

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5203

'title': 'yt-dlp unlisted playlist test',

5204

'availability': 'unlisted',

5205

'tags': [],

5206

'modified_date': '20220418',

5207

'channel': 'colethedj',

5208

'view_count': int,

5209

'description': '',

5210

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5211

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5212

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5217

'url': 'https://www.youtube.com/feed/recommended',

5218

'info_dict': {

5219

'id': 'recommended',

5220

'title': 'recommended',

5221

'tags': [],

5222

},

5223

'playlist_mincount': 50,

5224

'params': {

5225

'skip_download': True,

5226

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5227

},

5228

}, {

5229

'note': 'API Fallback: /videos tab, sorted by oldest first',

5230

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5231

'info_dict': {

5232

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5233

'title': 'Cody\'sLab - Videos',

5234

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5235

'uploader': 'Cody\'sLab',

5236

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5237

'channel': 'Cody\'sLab',

5238

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5239

'tags': [],

5240

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5241

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5242

'channel_follower_count': int

5243

},

5244

'playlist_mincount': 650,

5245

'params': {

5246

'skip_download': True,

5247

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5248

},

5249

}, {

5250

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5251

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5252

'info_dict': {

5253

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5254

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5255

'title': 'Uploads from Royalty Free Music - Topic',

5256

'uploader': 'Royalty Free Music - Topic',

5257

'modified_date': r're:\d{8}',

5258

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5259

'description': '',

5260

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5261

'tags': [],

5262

'channel': 'Royalty Free Music - Topic',

5263

'view_count': int,

5264

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5265

},

5266

'expected_warnings': [

5267

'does not have a videos tab',

5268

r'[Uu]navailable videos (are|will be) hidden',

5269

],

5270

'playlist_mincount': 101,

5271

'params': {

5272

'skip_download': True,

5273

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5274

},

5275

}, {

5276

'note': 'non-standard redirect to regional channel',

5277

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5278

'only_matching': True

5279

}, {

5280

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5281

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5282

'info_dict': {

5283

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5284

'modified_date': '20220407',

5285

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5286

'tags': [],

5287

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5288

'uploader': 'pukkandan',

5289

'availability': 'unlisted',

5290

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5291

'channel': 'pukkandan',

5292

'description': 'Test for collaborative playlist',

5293

'title': 'yt-dlp test - collaborative playlist',

5294

'view_count': int,

5295

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5296

},

5297

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5302

return False if YoutubeIE.suitable(url) else super().suitable(url)

5303

5304

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5305

5306

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5307

def _real_extract(self, url, smuggled_data):

5308

item_id = self._match_id(url)

5309

url = urllib.parse.urlunparse(

5310

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5311

compat_opts = self.get_param('compat_opts', [])

5312

5313

def get_mobj(url):

5314

mobj = self._URL_RE.match(url).groupdict()

5315

mobj.update((k, '') for k, v in mobj.items() if v is None)

5316

return mobj

5317

5318

mobj, redirect_warning = get_mobj(url), None

5319

# Youtube returns incomplete data if tabname is not lower case

5320

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5321

if is_channel:

5322

if smuggled_data.get('is_music_url'):

5323

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5324

item_id = item_id[2:]

5325

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5326

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5327

mdata = self._extract_tab_endpoint(

5328

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5329

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5330

get_all=False, expected_type=str)

5331

if not murl:

5332

raise ExtractorError('Failed to resolve album to playlist')

5333

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5334

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5335

pre = f'https://www.youtube.com/channel/{item_id}'

5336

5337

original_tab_name = tab

5338

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5339

# Home URLs should redirect to /videos/

5340

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5341

'To download only the videos in the home page, add a "/featured" to the URL')

5342

tab = '/videos'

5343

5344

url = ''.join((pre, tab, post))

5345

mobj = get_mobj(url)

5346

5347

# Handle both video/playlist URLs

5348

qs = parse_qs(url)

5349

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5350

5351

if not video_id and mobj['not_channel'].startswith('watch'):

5352

if not playlist_id:

5353

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5354

raise ExtractorError('Unable to recognize tab page')

5355

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5356

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5357

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5358

mobj = get_mobj(url)

5359

5360

if video_id and playlist_id:

5361

if self.get_param('noplaylist'):

5362

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5363

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5364

ie=YoutubeIE.ie_key(), video_id=video_id)

5365

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5366

5367

data, ytcfg = self._extract_data(url, item_id)

5368

5369

# YouTube may provide a non-standard redirect to the regional channel

5370

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5371

redirect_url = traverse_obj(

5372

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5373

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5374

redirect_url = ''.join((

5375

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5376

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5377

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5378

5379

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5380

if tabs:

5381

selected_tab = self._extract_selected_tab(tabs)

5382

selected_tab_name = selected_tab.get('title', '').lower()

5383

if selected_tab_name == 'home':

5384

selected_tab_name = 'featured'

5385

requested_tab_name = mobj['tab'][1:]

5386

if 'no-youtube-channel-redirect' not in compat_opts:

5387

if requested_tab_name == 'live': # Live tab should have redirected to the video

5388

raise UserNotLive(video_id=mobj['id'])

5389

if requested_tab_name not in ('', selected_tab_name):

5390

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5391

if not original_tab_name:

5392

if item_id[:2] == 'UC':

5393

# Topic channels don't have /videos. Use the equivalent playlist instead

5394

pl_id = f'UU{item_id[2:]}'

5395

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5396

try:

5397

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5398

except ExtractorError:

5399

redirect_warning += ' and the playlist redirect gave error'

5400

else:

5401

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5402

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5403

if selected_tab_name and selected_tab_name != requested_tab_name:

5404

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5405

else:

5406

raise ExtractorError(redirect_warning, expected=True)

5407

5408

if redirect_warning:

5409

self.to_screen(redirect_warning)

5410

self.write_debug(f'Final URL: {url}')

5411

5412

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5413

if 'no-youtube-unavailable-videos' not in compat_opts:

5414

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5415

self._extract_and_report_alerts(data, only_once=True)

5416

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5417

if tabs:

5418

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5419

5420

playlist = traverse_obj(

5421

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5422

if playlist:

5423

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5424

5425

video_id = traverse_obj(

5426

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5427

if video_id:

5428

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5429

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5430

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5431

ie=YoutubeIE.ie_key(), video_id=video_id)

5432

5433

raise ExtractorError('Unable to recognize tab page')

5434

5435

5436

class YoutubePlaylistIE(InfoExtractor):

5437

IE_DESC = 'YouTube playlists'

5438

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5449

)''' % {

5450

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5451

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5452

}

5453

IE_NAME = 'youtube:playlist'

5454

_TESTS = [{

5455

'note': 'issue #673',

5456

'url': 'PLBB231211A4F62143',

5457

'info_dict': {

5458

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5459

'id': 'PLBB231211A4F62143',

5460

'uploader': 'Wickman',

5461

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5462

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5463

'view_count': int,

5464

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5465

'modified_date': r're:\d{8}',

5466

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5467

'channel': 'Wickman',

5468

'tags': [],

5469

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5470

},

5471

'playlist_mincount': 29,

5472

}, {

5473

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5474

'info_dict': {

5475

'title': 'YDL_safe_search',

5476

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5477

},

5478

'playlist_count': 2,

5479

'skip': 'This playlist is private',

5480

}, {

5481

'note': 'embedded',

5482

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5487

'uploader': 'milan',

5488

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5489

'description': '',

5490

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5491

'tags': [],

5492

'modified_date': '20140919',

5493

'view_count': int,

5494

'channel': 'milan',

5495

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5496

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5497

},

5498

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5499

}, {

5500

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5501

'playlist_mincount': 455,

5502

'info_dict': {

5503

'title': '2018 Chinese New Singles (11/6 updated)',

5504

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5505

'uploader': 'LBK',

5506

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5507

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5508

'channel': 'LBK',

5509

'view_count': int,

5510

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5511

'tags': [],

5512

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5513

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5514

'modified_date': r're:\d{8}',

5515

},

5516

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5517

}, {

5518

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5519

'only_matching': True,

5520

}, {

5521

# music album playlist

5522

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5523

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5528

if YoutubeTabIE.suitable(url):

5529

return False

5530

from ..utils import parse_qs

5531

qs = parse_qs(url)

5532

if qs.get('v', [None])[0]:

5533

return False

5534

return super().suitable(url)

5535

5536

def _real_extract(self, url):

5537

playlist_id = self._match_id(url)

5538

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5539

url = update_url_query(

5540

'https://www.youtube.com/playlist',

5541

parse_qs(url) or {'list': playlist_id})

5542

if is_music_url:

5543

url = smuggle_url(url, {'is_music_url': True})

5544

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5545

5546

5547

class YoutubeYtBeIE(InfoExtractor):

5548

IE_DESC = 'youtu.be'

5549

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5550

_TESTS = [{

5551

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5556

'uploader': 'Backus-Page House Museum',

5557

'uploader_id': 'backuspagemuseum',

5558

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5559

'upload_date': '20161008',

5560

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5561

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5566

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5567

'channel': 'Backus-Page House Museum',

5568

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5569

'live_status': 'not_live',

5570

'view_count': int,

5571

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5572

'availability': 'public',

5573

'duration': 59,

5574

'comment_count': int,

5575

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5580

},

5581

}, {

5582

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5583

'only_matching': True,

5584

}]

5585

5586

def _real_extract(self, url):

5587

mobj = self._match_valid_url(url)

5588

video_id = mobj.group('id')

5589

playlist_id = mobj.group('playlist_id')

5590

return self.url_result(

5591

update_url_query('https://www.youtube.com/watch', {

5592

'v': video_id,

5593

'list': playlist_id,

5594

'feature': 'youtu.be',

5595

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5596

5597

5598

class YoutubeLivestreamEmbedIE(InfoExtractor):

5599

IE_DESC = 'YouTube livestream embeds'

5600

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5601

_TESTS = [{

5602

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5603

'only_matching': True,

5604

}]

5605

5606

def _real_extract(self, url):

5607

channel_id = self._match_id(url)

5608

return self.url_result(

5609

f'https://www.youtube.com/channel/{channel_id}/live',

5610

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5611

5612

5613

class YoutubeYtUserIE(InfoExtractor):

5614

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5615

IE_NAME = 'youtube:user'

5616

_VALID_URL = r'ytuser:(?P<id>.+)'

5617

_TESTS = [{

5618

'url': 'ytuser:phihag',

5619

'only_matching': True,

5620

}]

5621

5622

def _real_extract(self, url):

5623

user_id = self._match_id(url)

5624

return self.url_result(

5625

'https://www.youtube.com/user/%s/videos' % user_id,

5626

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5627

5628

5629

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5630

IE_NAME = 'youtube:favorites'

5631

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5632

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5633

_LOGIN_REQUIRED = True

5634

_TESTS = [{

5635

'url': ':ytfav',

5636

'only_matching': True,

5637

}, {

5638

'url': ':ytfavorites',

5639

'only_matching': True,

5640

}]

5641

5642

def _real_extract(self, url):

5643

return self.url_result(

5644

'https://www.youtube.com/playlist?list=LL',

5645

ie=YoutubeTabIE.ie_key())

5646

5647

5648

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5649

IE_NAME = 'youtube:notif'

5650

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5651

_VALID_URL = r':ytnotif(?:ication)?s?'

5652

_LOGIN_REQUIRED = True

5653

_TESTS = [{

5654

'url': ':ytnotif',

5655

'only_matching': True,

5656

}, {

5657

'url': ':ytnotifications',

5658

'only_matching': True,

5659

}]

5660

5661

def _extract_notification_menu(self, response, continuation_list):

5662

notification_list = traverse_obj(

5663

response,

5664

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5665

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5666

expected_type=list) or []

5667

continuation_list[0] = None

5668

for item in notification_list:

5669

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5670

if entry:

5671

yield entry

5672

continuation = item.get('continuationItemRenderer')

5673

if continuation:

5674

continuation_list[0] = continuation

5675

5676

def _extract_notification_renderer(self, notification):

5677

video_id = traverse_obj(

5678

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5679

url = f'https://www.youtube.com/watch?v={video_id}'

5680

channel_id = None

5681

if not video_id:

5682

browse_ep = traverse_obj(

5683

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5684

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5685

post_id = self._search_regex(

5686

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5687

'post id', default=None)

5688

if not channel_id or not post_id:

5689

return

5690

# The direct /post url redirects to this in the browser

5691

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5692

5693

channel = traverse_obj(

5694

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5695

expected_type=str)

5696

notification_title = self._get_text(notification, 'shortMessage')

5697

if notification_title:

5698

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5699

# TODO: handle recommended videos

5700

title = self._search_regex(

5701

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5702

'video title', default=None)

5703

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5704

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5710

'video_id': video_id,

5711

'title': title,

5712

'channel_id': channel_id,

5713

'channel': channel,

5714

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5715

'upload_date': upload_date,

5716

}

5717

5718

def _notification_menu_entries(self, ytcfg):

5719

continuation_list = [None]

5720

response = None

5721

for page in itertools.count(1):

5722

ctoken = traverse_obj(

5723

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5724

response = self._extract_response(

5725

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5726

ep='notification/get_notification_menu', check_get_keys='actions',

5727

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5728

yield from self._extract_notification_menu(response, continuation_list)

5729

if not continuation_list[0]:

5730

break

5731

5732

def _real_extract(self, url):

5733

display_id = 'notifications'

5734

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5735

self._report_playlist_authcheck(ytcfg)

5736

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5737

5738

5739

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5740

IE_DESC = 'YouTube search'

5741

IE_NAME = 'youtube:search'

5742

_SEARCH_KEY = 'ytsearch'

5743

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5744

_TESTS = [{

5745

'url': 'ytsearch5:youtube-dl test video',

5746

'playlist_count': 5,

5747

'info_dict': {

5748

'id': 'youtube-dl test video',

5749

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5755

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5756

_SEARCH_KEY = 'ytsearchdate'

5757

IE_DESC = 'YouTube search, newest videos first'

5758

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5759

_TESTS = [{

5760

'url': 'ytsearchdate5:youtube-dl test video',

5761

'playlist_count': 5,

5762

'info_dict': {

5763

'id': 'youtube-dl test video',

5764

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5770

IE_DESC = 'YouTube search URLs with sorting and filter support'

5771

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5772

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5773

_TESTS = [{

5774

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5775

'playlist_mincount': 5,

5776

'info_dict': {

5777

'id': 'youtube-dl test video',

5778

'title': 'youtube-dl test video',

5779

}

5780

}, {

5781

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5782

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5789

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

5794

# 'entries': [{

5795

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5801

'only_matching': True,

5802

}]

5803

5804

def _real_extract(self, url):

5805

qs = parse_qs(url)

5806

query = (qs.get('search_query') or qs.get('q'))[0]

5807

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5808

5809

5810

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5811

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5812

IE_NAME = 'youtube:music:search_url'

5813

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5814

_TESTS = [{

5815

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5816

'playlist_count': 16,

5817

'info_dict': {

5818

'id': 'royalty free music',

5819

'title': 'royalty free music',

5820

}

5821

}, {

5822

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5823

'playlist_mincount': 30,

5824

'info_dict': {

5825

'id': 'royalty free music - songs',

5826

'title': 'royalty free music - songs',

5827

},

5828

'params': {'extract_flat': 'in_playlist'}

5829

}, {

5830

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5831

'playlist_mincount': 30,

5832

'info_dict': {

5833

'id': 'royalty free music - community playlists',

5834

'title': 'royalty free music - community playlists',

5835

},

5836

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5841

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5842

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5843

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5844

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5845

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5846

}

5847

5848

def _real_extract(self, url):

5849

qs = parse_qs(url)

5850

query = (qs.get('search_query') or qs.get('q'))[0]

5851

params = qs.get('sp', (None,))[0]

5852

if params:

5853

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5854

else:

5855

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5856

params = self._SECTIONS.get(section)

5857

if not params:

5858

section = None

5859

title = join_nonempty(query, section, delim=' - ')

5860

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5861

5862

5863

class YoutubeFeedsInfoExtractor(InfoExtractor):

5864

"""

5865

Base class for feed extractors

5866

Subclasses must re-define the _FEED_NAME property.

5867

"""

5868

_LOGIN_REQUIRED = True

5869

_FEED_NAME = 'feeds'

5870

5871

def _real_initialize(self):

5872

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5877

5878

def _real_extract(self, url):

5879

return self.url_result(

5880

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5881

5882

5883

class YoutubeWatchLaterIE(InfoExtractor):

5884

IE_NAME = 'youtube:watchlater'

5885

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5886

_VALID_URL = r':ytwatchlater'

5887

_TESTS = [{

5888

'url': ':ytwatchlater',

5889

'only_matching': True,

5890

}]

5891

5892

def _real_extract(self, url):

5893

return self.url_result(

5894

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5895

5896

5897

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5898

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5899

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5900

_FEED_NAME = 'recommended'

5901

_LOGIN_REQUIRED = False

5902

_TESTS = [{

5903

'url': ':ytrec',

5904

'only_matching': True,

5905

}, {

5906

'url': ':ytrecommended',

5907

'only_matching': True,

5908

}, {

5909

'url': 'https://youtube.com',

5910

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5915

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5916

_VALID_URL = r':ytsub(?:scription)?s?'

5917

_FEED_NAME = 'subscriptions'

5918

_TESTS = [{

5919

'url': ':ytsubs',

5920

'only_matching': True,

5921

}, {

5922

'url': ':ytsubscriptions',

5923

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5928

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5929

_VALID_URL = r':ythis(?:tory)?'

5930

_FEED_NAME = 'history'

5931

_TESTS = [{

5932

'url': ':ythistory',

5933

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5938

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5939

IE_NAME = 'youtube:stories'

5940

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5941

_TESTS = [{

5942

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5943

'only_matching': True,

5944

}]

5945

5946

def _real_extract(self, url):

5947

playlist_id = f'RLTD{self._match_id(url)}'

5948

return self.url_result(

5949

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5950

ie=YoutubeTabIE, video_id=playlist_id)

5951

5952

5953

class YoutubeTruncatedURLIE(InfoExtractor):

5954

IE_NAME = 'youtube:truncated_url'

5955

IE_DESC = False # Do not list

5956

_VALID_URL = r'''(?x)

5957

(?:https?://)?

5958

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5959

(?:watch\?(?:

5960

feature=[a-z_]+|

5961

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5974

'only_matching': True,

5975

}, {

5976

'url': 'https://www.youtube.com/watch?',

5977

'only_matching': True,

5978

}, {

5979

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5980

'only_matching': True,

5981

}, {

5982

'url': 'https://www.youtube.com/watch?feature=foo',

5983

'only_matching': True,

5984

}, {

5985

'url': 'https://www.youtube.com/watch?hl=en-GB',

5986

'only_matching': True,

5987

}, {

5988

'url': 'https://www.youtube.com/watch?t=2372',

5989

'only_matching': True,

5990

}]

5991

5992

def _real_extract(self, url):

5993

raise ExtractorError(

5994

'Did you forget to quote the URL? Remember that & is a meta '

5995

'character in most shells, so you want to put the URL in quotes, '

5996

'like youtube-dl '

5997

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5998

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6003

IE_NAME = 'youtube:clip'

6004

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6005

_TESTS = [{

6006

# FIXME: Other metadata should be extracted from the clip, not from the base video

6007

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6008

'info_dict': {

6009

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6010

'ext': 'mp4',

6011

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6016

'categories': ['Gaming'],

6017

'channel': 'Scott The Woz',

6018

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6019

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6020

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6021

'like_count': int,

6022

'playable_in_embed': True,

6023

'tags': 'count:17',

6024

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6025

'title': 'Mobile Games on Console - Scott The Woz',

6026

'upload_date': '20210920',

6027

'uploader': 'Scott The Woz',

6028

'uploader_id': 'scottthewoz',

6029

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6030

'view_count': int,

6031

'live_status': 'not_live',

6032

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6037

clip_id = self._match_id(url)

6038

_, data = self._extract_webpage(url, clip_id)

6039

6040

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6041

if not video_id:

6042

raise ExtractorError('Unable to find video ID')

6043

6044

clip_data = traverse_obj(data, (

6045

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6046

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6047

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6048

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6049

6050

return {

6051

'_type': 'url_transparent',

6052

'url': f'https://www.youtube.com/watch?v={video_id}',

6053

'ie_key': YoutubeIE.ie_key(),

6054

'id': clip_id,

6055

'section_start': int(clip_data['startTimeMs']) / 1000,

6056

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6061

IE_NAME = 'youtube:truncated_id'

6062

IE_DESC = False # Do not list

6063

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6064

6065

_TESTS = [{

6066

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6067

'only_matching': True,

6068

}]

6069

6070

def _real_extract(self, url):

6071

video_id = self._match_id(url)

6072

raise ExtractorError(

6073

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6074

expected=True)