jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import functools
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	classproperty,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	float_or_none,
	40	format_field,
	41	get_first,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73	# any clients starting with _ cannot be explicity requested by the user
	74	INNERTUBE_CLIENTS = {
	75	'web': {
	76	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	77	'INNERTUBE_CONTEXT': {
	78	'client': {
	79	'clientName': 'WEB',
	80	'clientVersion': '2.20211221.00.00',
	81	}
	82	},
	83	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	84	},
	85	'web_embedded': {
	86	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	87	'INNERTUBE_CONTEXT': {
	88	'client': {
	89	'clientName': 'WEB_EMBEDDED_PLAYER',
	90	'clientVersion': '1.20211215.00.01',
	91	},
	92	},
	93	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	94	},
	95	'web_music': {
	96	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	97	'INNERTUBE_HOST': 'music.youtube.com',
	98	'INNERTUBE_CONTEXT': {
	99	'client': {
	100	'clientName': 'WEB_REMIX',
	101	'clientVersion': '1.20211213.00.00',
	102	}
	103	},
	104	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	105	},
	106	'web_creator': {
	107	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	108	'INNERTUBE_CONTEXT': {
	109	'client': {
	110	'clientName': 'WEB_CREATOR',
	111	'clientVersion': '1.20211220.02.00',
	112	}
	113	},
	114	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	115	},
	116	'android': {
	117	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	118	'INNERTUBE_CONTEXT': {
	119	'client': {
	120	'clientName': 'ANDROID',
	121	'clientVersion': '16.49',
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '16.49',
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '4.57',
	144	}
	145	},
	146	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	147	'REQUIRE_JS_PLAYER': False
	148	},
	149	'android_creator': {
	150	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	151	'INNERTUBE_CONTEXT': {
	152	'client': {
	153	'clientName': 'ANDROID_CREATOR',
	154	'clientVersion': '21.47',
	155	},
	156	},
	157	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	158	'REQUIRE_JS_PLAYER': False
	159	},
	160	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	161	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	162	'ios': {
	163	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	164	'INNERTUBE_CONTEXT': {
	165	'client': {
	166	'clientName': 'IOS',
	167	'clientVersion': '16.46',
	168	'deviceModel': 'iPhone14,3',
	169	}
	170	},
	171	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	172	'REQUIRE_JS_PLAYER': False
	173	},
	174	'ios_embedded': {
	175	'INNERTUBE_CONTEXT': {
	176	'client': {
	177	'clientName': 'IOS_MESSAGES_EXTENSION',
	178	'clientVersion': '16.46',
	179	'deviceModel': 'iPhone14,3',
	180	},
	181	},
	182	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	183	'REQUIRE_JS_PLAYER': False
	184	},
	185	'ios_music': {
	186	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	187	'INNERTUBE_CONTEXT': {
	188	'client': {
	189	'clientName': 'IOS_MUSIC',
	190	'clientVersion': '4.57',
	191	},
	192	},
	193	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	194	'REQUIRE_JS_PLAYER': False
	195	},
	196	'ios_creator': {
	197	'INNERTUBE_CONTEXT': {
	198	'client': {
	199	'clientName': 'IOS_CREATOR',
	200	'clientVersion': '21.47',
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	# mweb has 'ultralow' formats
	207	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	208	'mweb': {
	209	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	210	'INNERTUBE_CONTEXT': {
	211	'client': {
	212	'clientName': 'MWEB',
	213	'clientVersion': '2.20211221.01.00',
	214	}
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	217	},
	218	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	219	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	220	'tv_embedded': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	225	'clientVersion': '2.0',
	226	},
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	229	},
	230	}
	231
	232
	233	def _split_innertube_client(client_name):
	234	variant, *base = client_name.rsplit('.', 1)
	235	if base:
	236	return variant, base[0], variant
	237	base, *variant = client_name.split('_', 1)
	238	return client_name, base, variant[0] if variant else None
	239
	240
	241	def build_innertube_clients():
	242	THIRD_PARTY = {
	243	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	244	}
	245	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	246	priority = qualities(BASE_CLIENTS[::-1])
	247
	248	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	249	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	250	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	251	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	252	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	253
	254	_, base_client, variant = _split_innertube_client(client)
	255	ytcfg['priority'] = 10 * priority(base_client)
	256
	257	if not variant:
	258	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	259	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	260	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	261	embedscreen['priority'] -= 3
	262	elif variant == 'embedded':
	263	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	ytcfg['priority'] -= 2
	265	else:
	266	ytcfg['priority'] -= 3
	267
	268
	269	build_innertube_clients()
	270
	271
	272	class YoutubeBaseInfoExtractor(InfoExtractor):
	273	"""Provide base functions for Youtube extractors"""
	274
	275	_RESERVED_NAMES = (
	276	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	277	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	278	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	279	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	280
	281	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	282
	283	# _NETRC_MACHINE = 'youtube'
	284
	285	# If True it will raise an error if no login info is provided
	286	_LOGIN_REQUIRED = False
	287
	288	_INVIDIOUS_SITES = (
	289	# invidious-redirect websites
	290	r'(?:www\.)?redirect\.invidious\.io',
	291	r'(?:(?:www\|dev)\.)?invidio\.us',
	292	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	293	r'(?:www\.)?invidious\.pussthecat\.org',
	294	r'(?:www\.)?invidious\.zee\.li',
	295	r'(?:www\.)?invidious\.ethibox\.fr',
	296	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	297	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	298	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	299	# youtube-dl invidious instances list
	300	r'(?:(?:www\|no)\.)?invidiou\.sh',
	301	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	302	r'(?:www\.)?invidious\.kabi\.tk',
	303	r'(?:www\.)?invidious\.mastodon\.host',
	304	r'(?:www\.)?invidious\.zapashcanon\.fr',
	305	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	306	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	307	r'(?:www\.)?invidious\.himiko\.cloud',
	308	r'(?:www\.)?invidious\.reallyancient\.tech',
	309	r'(?:www\.)?invidious\.tube',
	310	r'(?:www\.)?invidiou\.site',
	311	r'(?:www\.)?invidious\.site',
	312	r'(?:www\.)?invidious\.xyz',
	313	r'(?:www\.)?invidious\.nixnet\.xyz',
	314	r'(?:www\.)?invidious\.048596\.xyz',
	315	r'(?:www\.)?invidious\.drycat\.fr',
	316	r'(?:www\.)?inv\.skyn3t\.in',
	317	r'(?:www\.)?tube\.poal\.co',
	318	r'(?:www\.)?tube\.connect\.cafe',
	319	r'(?:www\.)?vid\.wxzm\.sx',
	320	r'(?:www\.)?vid\.mint\.lgbt',
	321	r'(?:www\.)?vid\.puffyan\.us',
	322	r'(?:www\.)?yewtu\.be',
	323	r'(?:www\.)?yt\.elukerio\.org',
	324	r'(?:www\.)?yt\.lelux\.fi',
	325	r'(?:www\.)?invidious\.ggc-project\.de',
	326	r'(?:www\.)?yt\.maisputain\.ovh',
	327	r'(?:www\.)?ytprivate\.com',
	328	r'(?:www\.)?invidious\.13ad\.de',
	329	r'(?:www\.)?invidious\.toot\.koeln',
	330	r'(?:www\.)?invidious\.fdn\.fr',
	331	r'(?:www\.)?watch\.nettohikari\.com',
	332	r'(?:www\.)?invidious\.namazso\.eu',
	333	r'(?:www\.)?invidious\.silkky\.cloud',
	334	r'(?:www\.)?invidious\.exonip\.de',
	335	r'(?:www\.)?invidious\.riverside\.rocks',
	336	r'(?:www\.)?invidious\.blamefran\.net',
	337	r'(?:www\.)?invidious\.moomoo\.de',
	338	r'(?:www\.)?ytb\.trom\.tf',
	339	r'(?:www\.)?yt\.cyberhost\.uk',
	340	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	341	r'(?:www\.)?qklhadlycap4cnod\.onion',
	342	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	343	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	344	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	345	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	346	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	347	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	348	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	349	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	350	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	351	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	352	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	353	r'(?:www\.)?piped\.kavin\.rocks',
	354	r'(?:www\.)?piped\.silkky\.cloud',
	355	r'(?:www\.)?piped\.tokhmi\.xyz',
	356	r'(?:www\.)?piped\.moomoo\.me',
	357	r'(?:www\.)?il\.ax',
	358	r'(?:www\.)?piped\.syncpundit\.com',
	359	r'(?:www\.)?piped\.mha\.fi',
	360	r'(?:www\.)?piped\.mint\.lgbt',
	361	r'(?:www\.)?piped\.privacy\.com\.de',
	362	)
	363
	364	def _initialize_consent(self):
	365	cookies = self._get_cookies('https://www.youtube.com/')
	366	if cookies.get('__Secure-3PSID'):
	367	return
	368	consent_id = None
	369	consent = cookies.get('CONSENT')
	370	if consent:
	371	if 'YES' in consent.value:
	372	return
	373	consent_id = self._search_regex(
	374	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	375	if not consent_id:
	376	consent_id = random.randint(100, 999)
	377	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	378
	379	def _initialize_pref(self):
	380	cookies = self._get_cookies('https://www.youtube.com/')
	381	pref_cookie = cookies.get('PREF')
	382	pref = {}
	383	if pref_cookie:
	384	try:
	385	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	386	except ValueError:
	387	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	388	pref.update({'hl': 'en', 'tz': 'UTC'})
	389	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	390
	391	def _real_initialize(self):
	392	self._initialize_pref()
	393	self._initialize_consent()
	394	self._check_login_required()
	395
	396	def _check_login_required(self):
	397	if self._LOGIN_REQUIRED and not self._cookies_passed:
	398	self.raise_login_required('Login details are needed to download this content', method='cookies')
	399
	400	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+})\s;'
	401	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+})\s*;'
	402	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	403
	404	def _get_default_ytcfg(self, client='web'):
	405	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	406
	407	def _get_innertube_host(self, client='web'):
	408	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	409
	410	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	411	# try_get but with fallback to default ytcfg client values when present
	412	_func = lambda y: try_get(y, getter, expected_type)
	413	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	414
	415	def _extract_client_name(self, ytcfg, default_client='web'):
	416	return self._ytcfg_get_safe(
	417	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	418	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	419
	420	def _extract_client_version(self, ytcfg, default_client='web'):
	421	return self._ytcfg_get_safe(
	422	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	423	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	424
	425	def _extract_api_key(self, ytcfg=None, default_client='web'):
	426	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	427
	428	def _extract_context(self, ytcfg=None, default_client='web'):
	429	context = get_first(
	430	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	431	# Enforce language and tz for extraction
	432	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	433	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	434	return context
	435
	436	_SAPISID = None
	437
	438	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	439	time_now = round(time.time())
	440	if self._SAPISID is None:
	441	yt_cookies = self._get_cookies('https://www.youtube.com')
	442	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	443	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	444	sapisid_cookie = dict_get(
	445	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	446	if sapisid_cookie and sapisid_cookie.value:
	447	self._SAPISID = sapisid_cookie.value
	448	self.write_debug('Extracted SAPISID cookie')
	449	# SAPISID cookie is required if not already present
	450	if not yt_cookies.get('SAPISID'):
	451	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	452	self._set_cookie(
	453	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	454	else:
	455	self._SAPISID = False
	456	if not self._SAPISID:
	457	return None
	458	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	459	sapisidhash = hashlib.sha1(
	460	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	461	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	462
	463	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	464	note='Downloading API JSON', errnote='Unable to download API page',
	465	context=None, api_key=None, api_hostname=None, default_client='web'):
	466
	467	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	468	data.update(query)
	469	real_headers = self.generate_api_headers(default_client=default_client)
	470	real_headers.update({'content-type': 'application/json'})
	471	if headers:
	472	real_headers.update(headers)
	473	return self._download_json(
	474	f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	data = self._search_regex(
	481	(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	482	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	483	if data:
	484	return self._parse_json(data, item_id, fatal=fatal)
	485
	486	@staticmethod
	487	def _extract_session_index(*data):
	488	"""
	489	Index of current account in account list.
	490	See: https://github.com/yt-dlp/yt-dlp/pull/519
	491	"""
	492	for ytcfg in data:
	493	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	494	if session_index is not None:
	495	return session_index
	496
	497	# Deprecated?
	498	def _extract_identity_token(self, ytcfg=None, webpage=None):
	499	if ytcfg:
	500	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import functools

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

74

INNERTUBE_CLIENTS = {

75

'web': {

76

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

77

'INNERTUBE_CONTEXT': {

78

'client': {

79

'clientName': 'WEB',

80

'clientVersion': '2.20211221.00.00',

81

}

82

},

83

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

84

},

85

'web_embedded': {

86

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

87

'INNERTUBE_CONTEXT': {

88

'client': {

89

'clientName': 'WEB_EMBEDDED_PLAYER',

90

'clientVersion': '1.20211215.00.01',

91

},

92

},

93

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

94

},

95

'web_music': {

96

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

97

'INNERTUBE_HOST': 'music.youtube.com',

98

'INNERTUBE_CONTEXT': {

99

'client': {

100

'clientName': 'WEB_REMIX',

101

'clientVersion': '1.20211213.00.00',

102

}

103

},

104

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

105

},

106

'web_creator': {

107

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

108

'INNERTUBE_CONTEXT': {

109

'client': {

110

'clientName': 'WEB_CREATOR',

111

'clientVersion': '1.20211220.02.00',

112

}

113

},

114

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

115

},

116

'android': {

117

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

118

'INNERTUBE_CONTEXT': {

119

'client': {

120

'clientName': 'ANDROID',

121

'clientVersion': '16.49',

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '16.49',

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '4.57',

144

}

145

},

146

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

147

'REQUIRE_JS_PLAYER': False

148

},

149

'android_creator': {

150

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

151

'INNERTUBE_CONTEXT': {

152

'client': {

153

'clientName': 'ANDROID_CREATOR',

154

'clientVersion': '21.47',

155

},

156

},

157

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

158

'REQUIRE_JS_PLAYER': False

159

},

160

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

161

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

162

'ios': {

163

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

164

'INNERTUBE_CONTEXT': {

165

'client': {

166

'clientName': 'IOS',

167

'clientVersion': '16.46',

168

'deviceModel': 'iPhone14,3',

169

}

170

},

171

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

172

'REQUIRE_JS_PLAYER': False

173

},

174

'ios_embedded': {

175

'INNERTUBE_CONTEXT': {

176

'client': {

177

'clientName': 'IOS_MESSAGES_EXTENSION',

178

'clientVersion': '16.46',

179

'deviceModel': 'iPhone14,3',

180

},

181

},

182

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

183

'REQUIRE_JS_PLAYER': False

184

},

185

'ios_music': {

186

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

187

'INNERTUBE_CONTEXT': {

188

'client': {

189

'clientName': 'IOS_MUSIC',

190

'clientVersion': '4.57',

191

},

192

},

193

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

194

'REQUIRE_JS_PLAYER': False

195

},

196

'ios_creator': {

197

'INNERTUBE_CONTEXT': {

198

'client': {

199

'clientName': 'IOS_CREATOR',

200

'clientVersion': '21.47',

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

204

'REQUIRE_JS_PLAYER': False

205

},

206

# mweb has 'ultralow' formats

207

# See: https://github.com/yt-dlp/yt-dlp/pull/557

208

'mweb': {

209

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

210

'INNERTUBE_CONTEXT': {

211

'client': {

212

'clientName': 'MWEB',

213

'clientVersion': '2.20211221.01.00',

214

}

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

217

},

218

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

219

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

220

'tv_embedded': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

225

'clientVersion': '2.0',

226

},

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

234

variant, *base = client_name.rsplit('.', 1)

235

if base:

236

return variant, base[0], variant

237

base, *variant = client_name.split('_', 1)

238

return client_name, base, variant[0] if variant else None

239

240

241

def build_innertube_clients():

242

THIRD_PARTY = {

243

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

244

}

245

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

246

priority = qualities(BASE_CLIENTS[::-1])

247

248

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

249

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

250

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

251

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

252

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

253

254

_, base_client, variant = _split_innertube_client(client)

255

ytcfg['priority'] = 10 * priority(base_client)

256

257

if not variant:

258

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

259

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

260

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

261

embedscreen['priority'] -= 3

262

elif variant == 'embedded':

263

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

ytcfg['priority'] -= 2

265

else:

266

ytcfg['priority'] -= 3

267

268

269

build_innertube_clients()

270

271

272

class YoutubeBaseInfoExtractor(InfoExtractor):

273

"""Provide base functions for Youtube extractors"""

274

275

_RESERVED_NAMES = (

276

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

282

283

# _NETRC_MACHINE = 'youtube'

284

285

# If True it will raise an error if no login info is provided

286

_LOGIN_REQUIRED = False

287

288

_INVIDIOUS_SITES = (

289

# invidious-redirect websites

290

r'(?:www\.)?redirect\.invidious\.io',

291

r'(?:(?:www|dev)\.)?invidio\.us',

292

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

293

r'(?:www\.)?invidious\.pussthecat\.org',

294

r'(?:www\.)?invidious\.zee\.li',

295

r'(?:www\.)?invidious\.ethibox\.fr',

296

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

297

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

298

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

299

# youtube-dl invidious instances list

300

r'(?:(?:www|no)\.)?invidiou\.sh',

301

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

302

r'(?:www\.)?invidious\.kabi\.tk',

303

r'(?:www\.)?invidious\.mastodon\.host',

304

r'(?:www\.)?invidious\.zapashcanon\.fr',

305

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

306

r'(?:www\.)?invidious\.tinfoil-hat\.net',

307

r'(?:www\.)?invidious\.himiko\.cloud',

308

r'(?:www\.)?invidious\.reallyancient\.tech',

309

r'(?:www\.)?invidious\.tube',

310

r'(?:www\.)?invidiou\.site',

311

r'(?:www\.)?invidious\.site',

312

r'(?:www\.)?invidious\.xyz',

313

r'(?:www\.)?invidious\.nixnet\.xyz',

314

r'(?:www\.)?invidious\.048596\.xyz',

315

r'(?:www\.)?invidious\.drycat\.fr',

316

r'(?:www\.)?inv\.skyn3t\.in',

317

r'(?:www\.)?tube\.poal\.co',

318

r'(?:www\.)?tube\.connect\.cafe',

319

r'(?:www\.)?vid\.wxzm\.sx',

320

r'(?:www\.)?vid\.mint\.lgbt',

321

r'(?:www\.)?vid\.puffyan\.us',

322

r'(?:www\.)?yewtu\.be',

323

r'(?:www\.)?yt\.elukerio\.org',

324

r'(?:www\.)?yt\.lelux\.fi',

325

r'(?:www\.)?invidious\.ggc-project\.de',

326

r'(?:www\.)?yt\.maisputain\.ovh',

327

r'(?:www\.)?ytprivate\.com',

328

r'(?:www\.)?invidious\.13ad\.de',

329

r'(?:www\.)?invidious\.toot\.koeln',

330

r'(?:www\.)?invidious\.fdn\.fr',

331

r'(?:www\.)?watch\.nettohikari\.com',

332

r'(?:www\.)?invidious\.namazso\.eu',

333

r'(?:www\.)?invidious\.silkky\.cloud',

334

r'(?:www\.)?invidious\.exonip\.de',

335

r'(?:www\.)?invidious\.riverside\.rocks',

336

r'(?:www\.)?invidious\.blamefran\.net',

337

r'(?:www\.)?invidious\.moomoo\.de',

338

r'(?:www\.)?ytb\.trom\.tf',

339

r'(?:www\.)?yt\.cyberhost\.uk',

340

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

341

r'(?:www\.)?qklhadlycap4cnod\.onion',

342

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

343

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

344

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

345

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

346

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

347

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

348

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

349

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

350

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

351

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

352

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

353

r'(?:www\.)?piped\.kavin\.rocks',

354

r'(?:www\.)?piped\.silkky\.cloud',

355

r'(?:www\.)?piped\.tokhmi\.xyz',

356

r'(?:www\.)?piped\.moomoo\.me',

357

r'(?:www\.)?il\.ax',

358

r'(?:www\.)?piped\.syncpundit\.com',

359

r'(?:www\.)?piped\.mha\.fi',

360

r'(?:www\.)?piped\.mint\.lgbt',

361

r'(?:www\.)?piped\.privacy\.com\.de',

362

)

363

364

def _initialize_consent(self):

365

cookies = self._get_cookies('https://www.youtube.com/')

366

if cookies.get('__Secure-3PSID'):

367

return

368

consent_id = None

369

consent = cookies.get('CONSENT')

370

if consent:

371

if 'YES' in consent.value:

372

return

373

consent_id = self._search_regex(

374

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

375

if not consent_id:

376

consent_id = random.randint(100, 999)

377

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

378

379

def _initialize_pref(self):

380

cookies = self._get_cookies('https://www.youtube.com/')

381

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

386

except ValueError:

387

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

388

pref.update({'hl': 'en', 'tz': 'UTC'})

389

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

390

391

def _real_initialize(self):

392

self._initialize_pref()

393

self._initialize_consent()

394

self._check_login_required()

395

396

def _check_login_required(self):

397

if self._LOGIN_REQUIRED and not self._cookies_passed:

398

self.raise_login_required('Login details are needed to download this content', method='cookies')

399

400

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+})\s*;'

401

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+})\s*;'

402

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

403

404

def _get_default_ytcfg(self, client='web'):

405

return copy.deepcopy(INNERTUBE_CLIENTS[client])

406

407

def _get_innertube_host(self, client='web'):

408

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

409

410

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

411

# try_get but with fallback to default ytcfg client values when present

412

_func = lambda y: try_get(y, getter, expected_type)

413

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

414

415

def _extract_client_name(self, ytcfg, default_client='web'):

416

return self._ytcfg_get_safe(

417

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

418

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

419

420

def _extract_client_version(self, ytcfg, default_client='web'):

421

return self._ytcfg_get_safe(

422

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

423

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

424

425

def _extract_api_key(self, ytcfg=None, default_client='web'):

426

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

427

428

def _extract_context(self, ytcfg=None, default_client='web'):

429

context = get_first(

430

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

431

# Enforce language and tz for extraction

432

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

433

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

439

time_now = round(time.time())

440

if self._SAPISID is None:

441

yt_cookies = self._get_cookies('https://www.youtube.com')

442

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

443

# See: https://github.com/yt-dlp/yt-dlp/issues/393

444

sapisid_cookie = dict_get(

445

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

446

if sapisid_cookie and sapisid_cookie.value:

447

self._SAPISID = sapisid_cookie.value

448

self.write_debug('Extracted SAPISID cookie')

449

# SAPISID cookie is required if not already present

450

if not yt_cookies.get('SAPISID'):

451

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

452

self._set_cookie(

453

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

454

else:

455

self._SAPISID = False

456

if not self._SAPISID:

457

return None

458

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

459

sapisidhash = hashlib.sha1(

460

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

461

return f'SAPISIDHASH {time_now}_{sapisidhash}'

462

463

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

464

note='Downloading API JSON', errnote='Unable to download API page',

465

context=None, api_key=None, api_hostname=None, default_client='web'):

466

467

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

468

data.update(query)

469

real_headers = self.generate_api_headers(default_client=default_client)

470

real_headers.update({'content-type': 'application/json'})

471

if headers:

472

real_headers.update(headers)

473

return self._download_json(

474

f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

data = self._search_regex(

481

(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',

482

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

483

if data:

484

return self._parse_json(data, item_id, fatal=fatal)

485

486

@staticmethod

487

def _extract_session_index(*data):

488

"""

489

Index of current account in account list.

490

See: https://github.com/yt-dlp/yt-dlp/pull/519

491

"""

492

for ytcfg in data:

493

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

494

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

499

if ytcfg:

500

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

505

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

506

'identity token', default=None, fatal=False)

507

508

@staticmethod

509

def _extract_account_syncid(*args):

510

"""

511

Extract syncId required to download private playlists of secondary channels

512

@params response and/or ytcfg

513

"""

514

for data in args:

515

# ytcfg includes channel_syncid if on secondary channel

516

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

521

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

522

if len(sync_ids) >= 2 and sync_ids[1]:

523

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

524

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

529

"""

530

Extracts visitorData from an API response or ytcfg

531

Appears to be used to track session state

532

"""

533

return get_first(

534

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

535

expected_type=str)

536

537

@functools.cached_property

538

def is_authenticated(self):

539

return bool(self._generate_sapisidhash_header())

540

541

def extract_ytcfg(self, video_id, webpage):

542

if not webpage:

543

return {}

544

return self._parse_json(

545

self._search_regex(

546

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

547

default='{}'), video_id, fatal=False) or {}

548

549

def generate_api_headers(

550

self, *, ytcfg=None, account_syncid=None, session_index=None,

551

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

552

553

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

554

headers = {

555

'X-YouTube-Client-Name': compat_str(

556

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

557

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

558

'Origin': origin,

559

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

560

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

561

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

562

}

563

if session_index is None:

564

session_index = self._extract_session_index(ytcfg)

565

if account_syncid or session_index is not None:

566

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

567

568

auth = self._generate_sapisidhash_header(origin)

569

if auth is not None:

570

headers['Authorization'] = auth

571

headers['X-Origin'] = origin

572

return {h: v for h, v in headers.items() if v is not None}

573

574

def _download_ytcfg(self, client, video_id):

575

url = {

576

'web': 'https://www.youtube.com',

577

'web_music': 'https://music.youtube.com',

578

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

583

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

584

return self.extract_ytcfg(video_id, webpage) or {}

585

586

@staticmethod

587

def _build_api_continuation_query(continuation, ctp=None):

588

query = {

589

'continuation': continuation

590

}

591

# TODO: Inconsistency with clickTrackingParams.

592

# Currently we have a fixed ctp contained within context (from ytcfg)

593

# and a ctp in root query for continuation.

594

if ctp:

595

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

600

next_continuation = try_get(

601

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

602

lambda x: x['continuation']['reloadContinuationData']), dict)

603

if not next_continuation:

604

return

605

continuation = next_continuation.get('continuation')

606

if not continuation:

607

return

608

ctp = next_continuation.get('clickTrackingParams')

609

return cls._build_api_continuation_query(continuation, ctp)

610

611

@classmethod

612

def _extract_continuation_ep_data(cls, continuation_ep: dict):

613

if isinstance(continuation_ep, dict):

614

continuation = try_get(

615

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

616

if not continuation:

617

return

618

ctp = continuation_ep.get('clickTrackingParams')

619

return cls._build_api_continuation_query(continuation, ctp)

620

621

@classmethod

622

def _extract_continuation(cls, renderer):

623

next_continuation = cls._extract_next_continuation_data(renderer)

624

if next_continuation:

625

return next_continuation

626

627

contents = []

628

for key in ('contents', 'items'):

629

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

630

631

for content in contents:

632

if not isinstance(content, dict):

633

continue

634

continuation_ep = try_get(

635

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

636

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

637

dict)

638

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

644

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

645

if not isinstance(alert_dict, dict):

646

continue

647

for alert in alert_dict.values():

648

alert_type = alert.get('type')

649

if not alert_type:

650

continue

651

message = cls._get_text(alert, 'text')

652

if message:

653

yield alert_type, message

654

655

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

656

errors = []

657

warnings = []

658

for alert_type, alert_message in alerts:

659

if alert_type.lower() == 'error' and fatal:

660

errors.append([alert_type, alert_message])

661

else:

662

warnings.append([alert_type, alert_message])

663

664

for alert_type, alert_message in (warnings + errors[:-1]):

665

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

666

if errors:

667

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

668

669

def _extract_and_report_alerts(self, data, *args, **kwargs):

670

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

671

672

def _extract_badges(self, renderer: dict):

673

badges = set()

674

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

675

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

676

if label:

677

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

682

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

687

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

688

obj = [obj]

689

for item in obj:

690

text = try_get(item, lambda x: x['simpleText'], compat_str)

691

if text:

692

return text

693

runs = try_get(item, lambda x: x['runs'], list) or []

694

if not runs and isinstance(item, list):

695

runs = item

696

697

runs = runs[:min(len(runs), max_runs or len(runs))]

698

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

703

count_text = self._get_text(data, *path_list) or ''

704

count = parse_count(count_text)

705

if count is None:

706

count = str_to_int(

707

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

712

"""

713

Extract thumbnails from thumbnails dict

714

@param path_list: path list to level that contains 'thumbnails' key

715

"""

716

thumbnails = []

717

for path in path_list or [()]:

718

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

719

thumbnail_url = url_or_none(thumbnail.get('url'))

720

if not thumbnail_url:

721

continue

722

# Sometimes youtube gives a wrong thumbnail URL. See:

723

# https://github.com/yt-dlp/yt-dlp/issues/233

724

# https://github.com/ytdl-org/youtube-dl/issues/28023

725

if 'maxresdefault' in thumbnail_url:

726

thumbnail_url = thumbnail_url.split('?')[0]

727

thumbnails.append({

728

'url': thumbnail_url,

729

'height': int_or_none(thumbnail.get('height')),

730

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

736

"""

737

Extracts a relative time from string and converts to dt object

738

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

743

if start:

744

return datetime_from_str(start)

745

try:

746

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

751

"""@returns (timestamp, time_text)"""

752

text = self._get_text(renderer, *path_list) or ''

753

dt = self.extract_relative_time(text)

754

timestamp = None

755

if isinstance(dt, datetime.datetime):

756

timestamp = calendar.timegm(dt.timetuple())

757

758

if timestamp is None:

759

timestamp = (

760

unified_timestamp(text) or unified_timestamp(

761

self._search_regex(

762

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

763

text.lower(), 'time text', default=None)))

764

765

if text and timestamp is None:

766

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

767

return timestamp, text

768

769

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

770

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

771

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

776

if check_get_keys is None:

777

check_get_keys = []

778

while count < retries:

779

count += 1

780

if last_error:

781

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

782

try:

783

response = self._call_api(

784

ep=ep, fatal=True, headers=headers,

785

video_id=item_id, query=query,

786

context=self._extract_context(ytcfg, default_client),

787

api_key=self._extract_api_key(ytcfg, default_client),

788

api_hostname=api_hostname, default_client=default_client,

789

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

790

except ExtractorError as e:

791

if isinstance(e.cause, network_exceptions):

792

if isinstance(e.cause, compat_HTTPError):

793

first_bytes = e.cause.read(512)

794

if not is_html(first_bytes):

795

yt_error = try_get(

796

self._parse_json(

797

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

798

lambda x: x['error']['message'], compat_str)

799

if yt_error:

800

self._report_alerts([('ERROR', yt_error)], fatal=False)

801

# Downloading page may result in intermittent 5xx HTTP error

802

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

803

# We also want to catch all other network exceptions since errors in later pages can be troublesome

804

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

805

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

806

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

818

except ExtractorError as e:

819

# YouTube servers may return errors we want to retry on in a 200 OK response

820

# See: https://github.com/yt-dlp/yt-dlp/issues/839

821

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

827

return

828

if not check_get_keys or dict_get(response, check_get_keys):

829

break

830

# Youtube sometimes sends incomplete data

831

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

832

last_error = 'Incomplete data received'

833

if count >= retries:

834

if fatal:

835

raise ExtractorError(last_error)

836

else:

837

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

843

return re.match(r'https?://music\.youtube\.com/', url) is not None

844

845

def _extract_video(self, renderer):

846

video_id = renderer.get('videoId')

847

title = self._get_text(renderer, 'title')

848

description = self._get_text(renderer, 'descriptionSnippet')

849

duration = parse_duration(self._get_text(

850

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

851

if duration is None:

852

duration = parse_duration(self._search_regex(

853

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

854

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

855

video_id, default=None, group='duration'))

856

857

view_count = self._get_count(renderer, 'viewCountText')

858

859

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

860

channel_id = traverse_obj(

861

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

862

expected_type=str, get_all=False)

863

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

864

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

865

overlay_style = traverse_obj(

866

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

867

get_all=False, expected_type=str)

868

badges = self._extract_badges(renderer)

869

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

870

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

871

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

872

expected_type=str)) or ''

873

url = f'https://www.youtube.com/watch?v={video_id}'

874

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

875

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

884

'duration': duration,

885

'view_count': view_count,

886

'uploader': uploader,

887

'channel_id': channel_id,

888

'thumbnails': thumbnails,

889

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

890

if self._configuration_arg('approximate_date', ie_key='youtubetab')

891

else None),

892

'live_status': ('is_upcoming' if scheduled_timestamp is not None

893

else 'was_live' if 'streamed' in time_text.lower()

894

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

895

else None),

896

'release_timestamp': scheduled_timestamp,

897

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

902

IE_DESC = 'YouTube'

903

_VALID_URL = r"""(?x)^

904

(

905

(?:https?://|//) # http(s):// or protocol-independent URL

906

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

907

(?:www\.)?deturl\.com/www\.youtube\.com|

908

(?:www\.)?pwnyoutube\.com|

909

(?:www\.)?hooktube\.com|

910

(?:www\.)?yourepeat\.com|

911

tube\.majestyc\.net|

912

%(invidious)s|

913

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

914

(?:.*?\#/)? # handle anchor (#/) redirect urls

915

(?: # the various things that can precede the ID:

916

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

917

|(?: # or the v= param in all its forms

918

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

919

(?:\?|\#!?) # the params delimiter ? or # or #!

920

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

926

vid\.plus| # or vid.plus/xxxx

927

zwearz\.com/watch| # or zwearz.com/watch/xxxx

928

%(invidious)s

929

)/

930

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

931

)

932

)? # all until now is optional -> you can pass the naked ID

933

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

934

(?(1).+)? # if we found the ID, everything can follow

935

(?:\#|$)""" % {

936

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

937

}

938

_PLAYER_INFO_RE = (

939

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

940

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

941

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

942

)

943

_formats = {

944

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

945

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

946

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

947

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

948

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

949

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

950

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

951

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

952

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

953

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

954

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

955

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

956

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

957

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

958

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

959

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

960

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

961

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

966

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

967

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

968

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

969

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

970

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

971

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

972

973

# Apple HTTP Live Streaming

974

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

975

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

976

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

977

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

978

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

979

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

980

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

981

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

982

983

# DASH mp4 video

984

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

988

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

990

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

991

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

992

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

993

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

994

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

995

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

996

997

# Dash mp4 audio

998

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

999

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1000

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1001

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1002

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1003

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1004

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1005

1006

# Dash webm

1007

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1008

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1009

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1010

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1011

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1012

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1013

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1014

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1016

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1019

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1020

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1023

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1024

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1025

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1026

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1027

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1028

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1029

1030

# Dash webm audio

1031

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1032

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1033

1034

# Dash webm audio with opus inside

1035

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1036

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1037

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1038

1039

# RTMP (unnamed)

1040

'_rtmp': {'protocol': 'rtmp'},

1041

1042

# av01 video only formats sometimes served with "unknown" codecs

1043

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1044

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1045

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1046

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1047

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1048

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1049

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1050

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1051

}

1052

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1064

'uploader': 'Philipp Hagemeister',

1065

'uploader_id': 'phihag',

1066

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1067

'channel': 'Philipp Hagemeister',

1068

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1069

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1070

'upload_date': '20121002',

1071

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1072

'categories': ['Science & Technology'],

1073

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1078

'playable_in_embed': True,

1079

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1080

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1089

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1094

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1095

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1096

'uploader': 'SET India',

1097

'uploader_id': 'setindia',

1098

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1099

'age_limit': 18,

1100

},

1101

'skip': 'Private video',

1102

},

1103

{

1104

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1105

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1110

'uploader': 'Philipp Hagemeister',

1111

'uploader_id': 'phihag',

1112

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1113

'channel': 'Philipp Hagemeister',

1114

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1115

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1116

'upload_date': '20121002',

1117

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1118

'categories': ['Science & Technology'],

1119

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1124

'playable_in_embed': True,

1125

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1126

'live_status': 'not_live',

1127

'age_limit': 0,

1128

'channel_follower_count': int

1129

},

1130

'params': {

1131

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1136

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1141

'uploader_id': '8KVIDEO',

1142

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1143

'description': '',

1144

'uploader': '8KVIDEO',

1145

'title': 'UHDTV TEST 8K VIDEO.mp4'

1146

},

1147

'params': {

1148

'youtube_include_dash_manifest': True,

1149

'format': '141',

1150

},

1151

'skip': 'format 141 not served anymore',

1152

},

1153

# DASH manifest with encrypted signature

1154

{

1155

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1160

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1161

'duration': 244,

1162

'uploader': 'AfrojackVEVO',

1163

'uploader_id': 'AfrojackVEVO',

1164

'upload_date': '20131011',

1165

'abr': 129.495,

1166

'like_count': int,

1167

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1168

'playable_in_embed': True,

1169

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1170

'view_count': int,

1171

'track': 'The Spark',

1172

'live_status': 'not_live',

1173

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1174

'channel': 'Afrojack',

1175

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1176

'tags': 'count:19',

1177

'availability': 'public',

1178

'categories': ['Music'],

1179

'age_limit': 0,

1180

'alt_title': 'The Spark',

1181

'channel_follower_count': int

1182

},

1183

'params': {

1184

'youtube_include_dash_manifest': True,

1185

'format': '141/bestaudio[ext=m4a]',

1186

},

1187

},

1188

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1189

{

1190

'note': 'Embed allowed age-gate video',

1191

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1196

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1197

'duration': 142,

1198

'uploader': 'The Witcher',

1199

'uploader_id': 'WitcherGame',

1200

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1201

'upload_date': '20140605',

1202

'age_limit': 18,

1203

'categories': ['Gaming'],

1204

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1205

'availability': 'needs_auth',

1206

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1207

'like_count': int,

1208

'channel': 'The Witcher',

1209

'live_status': 'not_live',

1210

'tags': 'count:17',

1211

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1212

'playable_in_embed': True,

1213

'view_count': int,

1214

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1219

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1224

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1225

'upload_date': '20200408',

1226

'uploader_id': 'FlyingKitty900',

1227

'uploader': 'FlyingKitty',

1228

'age_limit': 18,

1229

'availability': 'needs_auth',

1230

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1231

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1232

'channel': 'FlyingKitty',

1233

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1234

'view_count': int,

1235

'categories': ['Entertainment'],

1236

'live_status': 'not_live',

1237

'tags': ['Flyingkitty', 'godzilla 2'],

1238

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1239

'like_count': int,

1240

'duration': 177,

1241

'playable_in_embed': True,

1242

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1247

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1248

'info_dict': {

1249

'id': 'Tq92D6wQ1mg',

1250

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1251

'ext': 'mp4',

1252

'upload_date': '20191228',

1253

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1254

'uploader': 'Projekt Melody',

1255

'description': 'md5:17eccca93a786d51bc67646756894066',

1256

'age_limit': 18,

1257

'like_count': int,

1258

'availability': 'needs_auth',

1259

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1260

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'view_count': int,

1262

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1263

'channel': 'Projekt Melody',

1264

'live_status': 'not_live',

1265

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1266

'playable_in_embed': True,

1267

'categories': ['Entertainment'],

1268

'duration': 106,

1269

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1270

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1275

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1280

'uploader': 'Herr Lurik',

1281

'uploader_id': 'st3in234',

1282

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1283

'upload_date': '20130730',

1284

'track': 'Such mich find mich',

1285

'age_limit': 0,

1286

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1287

'like_count': int,

1288

'playable_in_embed': False,

1289

'creator': 'OOMPH!',

1290

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1291

'view_count': int,

1292

'alt_title': 'Such mich find mich',

1293

'duration': 210,

1294

'channel': 'Herr Lurik',

1295

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1296

'categories': ['Music'],

1297

'availability': 'public',

1298

'uploader_url': 'http://www.youtube.com/user/st3in234',

1299

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1300

'live_status': 'not_live',

1301

'artist': 'OOMPH!',

1302

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1307

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1308

'only_matching': True,

1309

},

1310

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1311

# YouTube Red ad is not captured for creator

1312

{

1313

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1319

'uploader_id': 'deadmau5',

1320

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1321

'creator': 'deadmau5',

1322

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1323

'uploader': 'deadmau5',

1324

'title': 'Deadmau5 - Some Chords (HD)',

1325

'alt_title': 'Some Chords',

1326

'availability': 'public',

1327

'tags': 'count:14',

1328

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1329

'view_count': int,

1330

'live_status': 'not_live',

1331

'channel': 'deadmau5',

1332

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1333

'like_count': int,

1334

'track': 'Some Chords',

1335

'artist': 'deadmau5',

1336

'playable_in_embed': True,

1337

'age_limit': 0,

1338

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1339

'categories': ['Music'],

1340

'album': 'Some Chords',

1341

'channel_follower_count': int

1342

},

1343

'expected_warnings': [

1344

'DASH manifest missing',

1345

]

1346

},

1347

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1348

{

1349

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1355

'uploader_id': 'olympic',

1356

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1357

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1358

'uploader': 'Olympics',

1359

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1360

'like_count': int,

1361

'release_timestamp': 1343767800,

1362

'playable_in_embed': True,

1363

'categories': ['Sports'],

1364

'release_date': '20120731',

1365

'channel': 'Olympics',

1366

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1367

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1368

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1369

'age_limit': 0,

1370

'availability': 'public',

1371

'live_status': 'was_live',

1372

'view_count': int,

1373

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1374

'channel_follower_count': int

1375

},

1376

'params': {

1377

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1387

'duration': 85,

1388

'upload_date': '20110310',

1389

'uploader_id': 'AllenMeow',

1390

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1391

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1392

'uploader': '孫ᄋᄅ',

1393

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1394

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1399

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1400

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1401

'view_count': int,

1402

'categories': ['People & Blogs'],

1403

'like_count': int,

1404

'live_status': 'not_live',

1405

'availability': 'unlisted',

1406

'channel_follower_count': int

1407

},

1408

},

1409

# url_encoded_fmt_stream_map is empty string

1410

{

1411

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1416

'description': '',

1417

'upload_date': '20150404',

1418

'uploader_id': 'spbelect',

1419

'uploader': 'Наблюдатели Петербурга',

1420

},

1421

'params': {

1422

'skip_download': 'requires avconv',

1423

},

1424

'skip': 'This live event has ended.',

1425

},

1426

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1427

{

1428

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1433

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1434

'duration': 220,

1435

'upload_date': '20150625',

1436

'uploader_id': 'dorappi2000',

1437

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1438

'uploader': 'dorappi2000',

1439

'formats': 'mincount:31',

1440

},

1441

'skip': 'not actual anymore',

1442

},

1443

# DASH manifest with segment_list

1444

{

1445

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1446

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1451

'uploader': 'Airtek',

1452

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1453

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1454

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1455

},

1456

'params': {

1457

'youtube_include_dash_manifest': True,

1458

'format': '135', # bestvideo

1459

},

1460

'skip': 'This live event has ended.',

1461

},

1462

{

1463

# Multifeed videos (multiple cameras), URL is for Main Camera

1464

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1465

'info_dict': {

1466

'id': 'jvGDaLqkpTg',

1467

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1475

'description': 'md5:e03b909557865076822aa169218d6a5d',

1476

'duration': 10643,

1477

'upload_date': '20161111',

1478

'uploader': 'Team PGP',

1479

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1480

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1487

'description': 'md5:e03b909557865076822aa169218d6a5d',

1488

'duration': 10991,

1489

'upload_date': '20161111',

1490

'uploader': 'Team PGP',

1491

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1492

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1499

'description': 'md5:e03b909557865076822aa169218d6a5d',

1500

'duration': 10995,

1501

'upload_date': '20161111',

1502

'uploader': 'Team PGP',

1503

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1504

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1511

'description': 'md5:e03b909557865076822aa169218d6a5d',

1512

'duration': 10990,

1513

'upload_date': '20161111',

1514

'uploader': 'Team PGP',

1515

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1516

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1521

},

1522

'skip': 'Not multifeed anymore',

1523

},

1524

{

1525

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1526

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1527

'info_dict': {

1528

'id': 'gVfLd0zydlo',

1529

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1530

},

1531

'playlist_count': 2,

1532

'skip': 'Not multifeed anymore',

1533

},

1534

{

1535

'url': 'https://vid.plus/FlRa-iH7PGw',

1536

'only_matching': True,

1537

},

1538

{

1539

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1540

'only_matching': True,

1541

},

1542

{

1543

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1544

# Also tests cut-off URL expansion in video description (see

1545

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1546

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1547

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1552

'alt_title': 'Dark Walk',

1553

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1554

'duration': 133,

1555

'upload_date': '20151119',

1556

'uploader_id': 'IronSoulElf',

1557

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1558

'uploader': 'IronSoulElf',

1559

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1560

'track': 'Dark Walk',

1561

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1562

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1563

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1564

'categories': ['Film & Animation'],

1565

'view_count': int,

1566

'live_status': 'not_live',

1567

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1568

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1569

'tags': 'count:13',

1570

'availability': 'public',

1571

'channel': 'IronSoulElf',

1572

'playable_in_embed': True,

1573

'like_count': int,

1574

'age_limit': 0,

1575

'channel_follower_count': int

1576

},

1577

'params': {

1578

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1583

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1584

'only_matching': True,

1585

},

1586

{

1587

# Video with yt:stretch=17:0

1588

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1593

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1594

'upload_date': '20151107',

1595

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1596

'uploader': 'CH GAMER DROID',

1597

},

1598

'params': {

1599

'skip_download': True,

1600

},

1601

'skip': 'This video does not exist.',

1602

},

1603

{

1604

# Video with incomplete 'yt:stretch=16:'

1605

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1606

'only_matching': True,

1607

},

1608

{

1609

# Video licensed under Creative Commons

1610

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1615

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1616

'duration': 721,

1617

'upload_date': '20150128',

1618

'uploader_id': 'BerkmanCenter',

1619

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1620

'uploader': 'The Berkman Klein Center for Internet & Society',

1621

'license': 'Creative Commons Attribution license (reuse allowed)',

1622

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1623

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1624

'like_count': int,

1625

'age_limit': 0,

1626

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1627

'channel': 'The Berkman Klein Center for Internet & Society',

1628

'availability': 'public',

1629

'view_count': int,

1630

'categories': ['Education'],

1631

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1632

'live_status': 'not_live',

1633

'playable_in_embed': True,

1634

'channel_follower_count': int

1635

},

1636

'params': {

1637

'skip_download': True,

},

},

{

# Channel-like uploader_url

1642

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1647

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1648

'duration': 4060,

1649

'upload_date': '20151120',

1650

'uploader': 'Bernie Sanders',

1651

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1653

'license': 'Creative Commons Attribution license (reuse allowed)',

1654

'playable_in_embed': True,

1655

'tags': 'count:12',

1656

'like_count': int,

1657

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1658

'age_limit': 0,

1659

'availability': 'public',

1660

'categories': ['News & Politics'],

1661

'channel': 'Bernie Sanders',

1662

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1663

'view_count': int,

1664

'live_status': 'not_live',

1665

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1666

'channel_follower_count': int

1667

},

1668

'params': {

1669

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1674

'only_matching': True,

1675

},

1676

{

1677

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1678

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1679

'only_matching': True,

1680

},

1681

{

1682

# Rental video preview

1683

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1688

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1689

'upload_date': '20150811',

1690

'uploader': 'FlixMatrix',

1691

'uploader_id': 'FlixMatrixKaravan',

1692

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1693

'license': 'Standard YouTube License',

1694

},

1695

'params': {

1696

'skip_download': True,

1697

},

1698

'skip': 'This video is not available.',

1699

},

1700

{

1701

# YouTube Red video with episode data

1702

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1707

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1708

'duration': 2085,

1709

'upload_date': '20170118',

1710

'uploader': 'Vsauce',

1711

'uploader_id': 'Vsauce',

1712

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1713

'series': 'Mind Field',

1714

'season_number': 1,

1715

'episode_number': 1,

1716

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1717

'tags': 'count:12',

1718

'view_count': int,

1719

'availability': 'public',

1720

'age_limit': 0,

1721

'channel': 'Vsauce',

1722

'episode': 'Episode 1',

1723

'categories': ['Entertainment'],

1724

'season': 'Season 1',

1725

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1726

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1727

'like_count': int,

1728

'playable_in_embed': True,

1729

'live_status': 'not_live',

1730

'channel_follower_count': int

1731

},

1732

'params': {

1733

'skip_download': True,

1734

},

1735

'expected_warnings': [

1736

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1741

# as inappropriate or offensive to some audiences.

1742

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1747

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1748

'duration': 965,

1749

'upload_date': '20140124',

1750

'uploader': 'New Century Foundation',

1751

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1752

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1753

},

1754

'params': {

1755

'skip_download': True,

1756

},

1757

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1762

'only_matching': True,

1763

},

1764

{

1765

# geo restricted to JP

1766

'url': 'sJL6WA-aGkQ',

1767

'only_matching': True,

1768

},

1769

{

1770

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1771

'only_matching': True,

1772

},

1773

{

1774

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1775

'only_matching': True,

1776

},

1777

{

1778

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1779

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1780

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1785

'only_matching': True,

1786

},

1787

{

1788

# Video with unsupported adaptive stream type formats

1789

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1794

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1795

'duration': 433,

1796

'upload_date': '20130923',

1797

'uploader': 'Amelia Putri Harwita',

1798

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1800

'formats': 'maxcount:10',

1801

},

1802

'params': {

1803

'skip_download': True,

1804

'youtube_include_dash_manifest': False,

1805

},

1806

'skip': 'not actual anymore',

1807

},

1808

{

1809

# Youtube Music Auto-generated description

1810

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1815

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1816

'upload_date': '20190312',

1817

'uploader': 'Stephen - Topic',

1818

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1819

'artist': 'Stephen',

1820

'track': 'Voyeur Girl',

1821

'album': 'it\'s too much love to know my dear',

1822

'release_date': '20190313',

1823

'release_year': 2019,

1824

'alt_title': 'Voyeur Girl',

1825

'view_count': int,

1826

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1827

'playable_in_embed': True,

1828

'like_count': int,

1829

'categories': ['Music'],

1830

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1831

'channel': 'Stephen',

1832

'availability': 'public',

1833

'creator': 'Stephen',

1834

'duration': 169,

1835

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1836

'age_limit': 0,

1837

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1838

'tags': 'count:11',

1839

'live_status': 'not_live',

1840

'channel_follower_count': int

1841

},

1842

'params': {

1843

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1848

'only_matching': True,

1849

},

1850

{

1851

# invalid -> valid video id redirection

1852

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1857

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1858

'upload_date': '20090125',

1859

'uploader': 'Prochorowka',

1860

'uploader_id': 'Prochorowka',

1861

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1862

'artist': 'Panjabi MC',

1863

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1864

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1865

},

1866

'params': {

1867

'skip_download': True,

1868

},

1869

'skip': 'Video unavailable',

1870

},

1871

{

1872

# empty description results in an empty string

1873

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1880

'uploader_id': 'ElevageOrVert',

1881

'uploader': 'ElevageOrVert',

1882

'view_count': int,

1883

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1884

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1885

'like_count': int,

1886

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1887

'tags': [],

1888

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1889

'availability': 'public',

1890

'age_limit': 0,

1891

'categories': ['Pets & Animals'],

1892

'duration': 7,

1893

'playable_in_embed': True,

1894

'live_status': 'not_live',

1895

'channel': 'ElevageOrVert',

1896

'channel_follower_count': int

1897

},

1898

'params': {

1899

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1904

# see [2] for an example with '};' inside ytInitialPlayerResponse

1905

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1906

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1907

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1912

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1913

'upload_date': '20130831',

1914

'uploader_id': 'kudvenkat',

1915

'uploader': 'kudvenkat',

1916

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1917

'like_count': int,

1918

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1919

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1920

'live_status': 'not_live',

1921

'categories': ['Education'],

1922

'availability': 'public',

1923

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1924

'tags': 'count:12',

1925

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1930

'channel_follower_count': int

1931

},

1932

'params': {

1933

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1938

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1939

'only_matching': True,

1940

},

1941

{

1942

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1943

'only_matching': True,

1944

},

1945

{

1946

# https://github.com/ytdl-org/youtube-dl/pull/28094

1947

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1953

'upload_date': '20141120',

1954

'uploader': 'The Cinematic Orchestra - Topic',

1955

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1956

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1957

'artist': 'The Cinematic Orchestra',

1958

'track': 'Burn Out',

1959

'album': 'Every Day',

1960

'like_count': int,

1961

'live_status': 'not_live',

1962

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'creator': 'The Cinematic Orchestra',

1968

'channel': 'The Cinematic Orchestra',

1969

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1970

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1971

'availability': 'public',

1972

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1973

'categories': ['Music'],

1974

'playable_in_embed': True,

1975

'channel_follower_count': int

1976

},

1977

'params': {

1978

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1983

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1984

'only_matching': True,

1985

},

1986

{

1987

# controversial video, requires bpctr/contentCheckOk

1988

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1993

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1994

'uploader': 'CBS Mornings',

1995

'uploader_id': 'CBSThisMorning',

1996

'upload_date': '20140716',

1997

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1998

'duration': 170,

1999

'categories': ['News & Politics'],

2000

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2001

'view_count': int,

2002

'channel': 'CBS Mornings',

2003

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2004

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2005

'age_limit': 18,

2006

'availability': 'needs_auth',

2007

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2008

'like_count': int,

2009

'live_status': 'not_live',

2010

'playable_in_embed': True,

2011

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2016

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2021

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2022

'upload_date': '20201120',

2023

'uploader': 'Walk around Japan',

2024

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'duration': 1456,

2027

'categories': ['Travel & Events'],

2028

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2029

'view_count': int,

2030

'channel': 'Walk around Japan',

2031

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2032

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2033

'age_limit': 0,

2034

'availability': 'public',

2035

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2036

'live_status': 'not_live',

2037

'playable_in_embed': True,

2038

'channel_follower_count': int

2039

},

2040

'params': {

2041

'skip_download': True,

2042

},

2043

}, {

2044

# Has multiple audio streams

2045

'url': 'WaOKSUlf4TM',

2046

'only_matching': True

2047

}, {

2048

# Requires Premium: has format 141 when requested using YTM url

2049

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2050

'only_matching': True

2051

}, {

2052

# multiple subtitles with same lang_code

2053

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2054

'only_matching': True,

2055

}, {

2056

# Force use android client fallback

2057

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2058

'info_dict': {

2059

'id': 'YOelRv7fMxY',

2060

'title': 'DIGGING A SECRET TUNNEL Part 1',

2061

'ext': '3gp',

2062

'upload_date': '20210624',

2063

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2064

'uploader': 'colinfurze',

2065

'uploader_id': 'colinfurze',

2066

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2067

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2068

'duration': 596,

2069

'categories': ['Entertainment'],

2070

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2071

'view_count': int,

2072

'channel': 'colinfurze',

2073

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2074

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2075

'age_limit': 0,

2076

'availability': 'public',

2077

'like_count': int,

2078

'live_status': 'not_live',

2079

'playable_in_embed': True,

2080

'channel_follower_count': int

2081

},

2082

'params': {

2083

'format': '17', # 3gp format available on android

2084

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2089

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2090

'only_matching': True,

2091

'params': {

2092

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2097

'only_matching': True,

2098

}, {

2099

'note': 'Storyboards',

2100

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2106

'uploader_id': 'scishow',

2107

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2108

'upload_date': '20140324',

2109

'uploader': 'SciShow',

2110

'like_count': int,

2111

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2112

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2113

'view_count': int,

2114

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2115

'playable_in_embed': True,

2116

'tags': 'count:12',

2117

'uploader_url': 'http://www.youtube.com/user/scishow',

2118

'availability': 'public',

2119

'channel': 'SciShow',

2120

'live_status': 'not_live',

2121

'duration': 248,

2122

'categories': ['Education'],

2123

'age_limit': 0,

2124

'channel_follower_count': int

2125

}, 'params': {'format': 'mhtml', 'skip_download': True}

2126

}, {

2127

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2128

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2133

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2134

'uploader': 'Leon Nguyen',

2135

'uploader_id': 'VNSXIII',

2136

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2137

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2138

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2143

'tags': 'count:23',

2144

'playable_in_embed': True,

2145

'live_status': 'not_live',

2146

'upload_date': '20220103',

2147

'like_count': int,

2148

'availability': 'public',

2149

'channel': 'Leon Nguyen',

2150

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2151

'channel_follower_count': int

2152

}

2153

}, {

2154

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2155

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2160

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2161

'uploader': 'Quackity',

2162

'uploader_id': 'QuackityHQ',

2163

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2164

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2165

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2170

'tags': 'count:26',

2171

'playable_in_embed': True,

2172

'live_status': 'not_live',

2173

'release_timestamp': 1641172509,

2174

'release_date': '20220103',

2175

'upload_date': '20220103',

2176

'like_count': int,

2177

'availability': 'public',

2178

'channel': 'Quackity',

2179

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2180

'channel_follower_count': int

2181

}

2182

},

2183

{ # continuous livestream. Microformat upload date should be preferred.

2184

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2185

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2186

'info_dict': {

2187

'id': 'kgx4WGK0oNU',

2188

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2189

'ext': 'mp4',

2190

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2191

'availability': 'public',

2192

'age_limit': 0,

2193

'release_timestamp': 1637975704,

2194

'upload_date': '20210619',

2195

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2196

'live_status': 'is_live',

2197

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2198

'uploader': '阿鲍Abao',

2199

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2200

'channel': 'Abao in Tokyo',

2201

'channel_follower_count': int,

2202

'release_date': '20211127',

2203

'tags': 'count:39',

2204

'categories': ['People & Blogs'],

2205

'like_count': int,

2206

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2207

'view_count': int,

2208

'playable_in_embed': True,

2209

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2210

},

2211

'params': {'skip_download': True}

2212

}, {

2213

# Story. Requires specific player params to work.

2214

# Note: stories get removed after some period of time

2215

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2220

'view_count': int,

2221

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2222

'upload_date': '20220526',

2223

'categories': ['Education'],

2224

'title': 'Story',

2225

'channel': 'IT\'S HISTORY',

2226

'description': '',

2227

'uploader_id': 'BlastfromthePast',

2228

'duration': 12,

2229

'uploader': 'IT\'S HISTORY',

2230

'playable_in_embed': True,

2231

'age_limit': 0,

2232

'live_status': 'not_live',

2233

'tags': [],

2234

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2235

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2236

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2237

}

2238

}, {

2239

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2244

'upload_date': '20220323',

2245

'like_count': int,

2246

'availability': 'unlisted',

2247

'channel': 'nao20010128nao',

2248

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2249

'age_limit': 0,

2250

'uploader': 'nao20010128nao',

2251

'uploader_id': 'nao20010128nao',

2252

'categories': ['Music'],

2253

'view_count': int,

2254

'description': '',

2255

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2256

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2257

'live_status': 'not_live',

2258

'playable_in_embed': True,

2259

'channel_follower_count': int,

2260

'duration': 6,

2261

'tags': [],

2262

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2269

from ..utils import parse_qs

2270

2271

qs = parse_qs(url)

2272

if qs.get('list', [None])[0]:

2273

return False

2274

return super().suitable(url)

2275

2276

def __init__(self, *args, **kwargs):

2277

super().__init__(*args, **kwargs)

2278

self._code_cache = {}

2279

self._player_cache = {}

2280

2281

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2282

lock = threading.Lock()

2283

2284

is_live = True

2285

start_time = time.time()

2286

formats = [f for f in formats if f.get('is_from_start')]

2287

2288

def refetch_manifest(format_id, delay):

2289

nonlocal formats, start_time, is_live

2290

if time.time() <= start_time + delay:

2291

return

2292

2293

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2294

video_details = traverse_obj(

2295

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2296

microformats = traverse_obj(

2297

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2298

expected_type=dict, default=[])

2299

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2300

start_time = time.time()

2301

2302

def mpd_feed(format_id, delay):

2303

"""

2304

@returns (manifest_url, manifest_stream_number, is_live) or None

2305

"""

2306

with lock:

2307

refetch_manifest(format_id, delay)

2308

2309

f = next((f for f in formats if f['format_id'] == format_id), None)

2310

if not f:

2311

if not is_live:

2312

self.to_screen(f'{video_id}: Video is no longer live')

2313

else:

2314

self.report_warning(

2315

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2316

return None

2317

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2322

f['fragments'] = functools.partial(

2323

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2324

2325

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2326

FETCH_SPAN, MAX_DURATION = 5, 432000

2327

2328

mpd_url, stream_number, is_live = None, None, True

2329

2330

begin_index = 0

2331

download_start_time = ctx.get('start') or time.time()

2332

2333

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2334

if lack_early_segments:

2335

self.report_warning(bug_reports_message(

2336

'Starting download from the last 120 hours of the live stream since '

2337

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2338

lack_early_segments = True

2339

2340

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2341

fragments, fragment_base_url = None, None

2342

2343

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2344

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2345

# Obtain from MPD's maximum seq value

2346

old_mpd_url = mpd_url

2347

last_error = ctx.pop('last_error', None)

2348

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2349

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2350

or (mpd_url, stream_number, False))

2351

if not refresh_sequence:

2352

if expire_fast and not is_live:

2353

return False, last_seq

2354

elif old_mpd_url == mpd_url:

2355

return True, last_seq

2356

try:

2357

fmts, _ = self._extract_mpd_formats_and_subtitles(

2358

mpd_url, None, note=False, errnote=False, fatal=False)

2359

except ExtractorError:

2360

fmts = None

2361

if not fmts:

2362

no_fragment_score += 2

2363

return False, last_seq

2364

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2365

fragments = fmt_info['fragments']

2366

fragment_base_url = fmt_info['fragment_base_url']

2367

assert fragment_base_url

2368

2369

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2370

return True, _last_seq

2371

2372

while is_live:

2373

fetch_time = time.time()

2374

if no_fragment_score > 30:

2375

return

2376

if last_segment_url:

2377

# Obtain from "X-Head-Seqnum" header value from each segment

2378

try:

2379

urlh = self._request_webpage(

2380

last_segment_url, None, note=False, errnote=False, fatal=False)

2381

except ExtractorError:

2382

urlh = None

2383

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2384

if last_seq is None:

2385

no_fragment_score += 2

2386

last_segment_url = None

2387

continue

2388

else:

2389

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2390

no_fragment_score += 2

2391

if not should_continue:

2392

continue

2393

2394

if known_idx > last_seq:

2395

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2401

# skip from the start when it's negative value

2402

known_idx = last_seq + begin_index

2403

if lack_early_segments:

2404

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2405

try:

2406

for idx in range(known_idx, last_seq):

2407

# do not update sequence here or you'll get skipped some part of it

2408

should_continue, _ = _extract_sequence_from_mpd(False, False)

2409

if not should_continue:

2410

known_idx = idx - 1

2411

raise ExtractorError('breaking out of outer loop')

2412

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2413

yield {

2414

'url': last_segment_url,

2415

}

2416

if known_idx == last_seq:

2417

no_fragment_score += 5

2418

else:

2419

no_fragment_score = 0

2420

known_idx = last_seq

2421

except ExtractorError:

2422

continue

2423

2424

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2425

2426

def _extract_player_url(self, *ytcfgs, webpage=None):

2427

player_url = traverse_obj(

2428

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2429

get_all=False, expected_type=compat_str)

2430

if not player_url:

2431

return

2432

return urljoin('https://www.youtube.com', player_url)

2433

2434

def _download_player_url(self, video_id, fatal=False):

2435

res = self._download_webpage(

2436

'https://www.youtube.com/iframe_api',

2437

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2438

if res:

2439

player_version = self._search_regex(

2440

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2441

if player_version:

2442

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2443

2444

def _signature_cache_id(self, example_sig):

2445

""" Return a string representation of a signature """

2446

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2447

2448

@classmethod

2449

def _extract_player_info(cls, player_url):

2450

for player_re in cls._PLAYER_INFO_RE:

2451

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2456

return id_m.group('id')

2457

2458

def _load_player(self, video_id, player_url, fatal=True):

2459

player_id = self._extract_player_info(player_url)

2460

if player_id not in self._code_cache:

2461

code = self._download_webpage(

2462

player_url, video_id, fatal=fatal,

2463

note='Downloading player ' + player_id,

2464

errnote='Download of %s failed' % player_url)

2465

if code:

2466

self._code_cache[player_id] = code

2467

return self._code_cache.get(player_id)

2468

2469

def _extract_signature_function(self, video_id, player_url, example_sig):

2470

player_id = self._extract_player_info(player_url)

2471

2472

# Read from filesystem cache

2473

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2474

assert os.path.basename(func_id) == func_id

2475

2476

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2477

if cache_spec is not None:

2478

return lambda s: ''.join(s[i] for i in cache_spec)

2479

2480

code = self._load_player(video_id, player_url)

2481

if code:

2482

res = self._parse_sig_js(code)

2483

2484

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2485

cache_res = res(test_string)

2486

cache_spec = [ord(c) for c in cache_res]

2487

2488

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2489

return res

2490

2491

def _print_sig_code(self, func, example_sig):

2492

if not self.get_param('youtube_print_sig_code'):

2493

return

2494

2495

def gen_sig_code(idxs):

2496

def _genslice(start, end, step):

2497

starts = '' if start == 0 else str(start)

2498

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2499

steps = '' if step == 1 else (':%d' % step)

2500

return f's[{starts}{ends}{steps}]'

2501

2502

step = None

2503

# Quelch pyflakes warnings - start will be set when step is set

2504

start = '(Never used)'

2505

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2510

step = None

2511

continue

2512

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2522

2523

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2524

cache_res = func(test_string)

2525

cache_spec = [ord(c) for c in cache_res]

2526

expr_code = ' + '.join(gen_sig_code(cache_spec))

2527

signature_id_tuple = '(%s)' % (

2528

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2529

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2530

' return %s\n') % (signature_id_tuple, expr_code)

2531

self.to_screen('Extracted signature function:\n' + code)

2532

2533

def _parse_sig_js(self, jscode):

2534

funcname = self._search_regex(

2535

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2536

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2537

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2538

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2539

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2540

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2541

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2542

# Obsolete patterns

2543

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2544

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2545

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2546

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2547

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2548

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2549

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2550

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2551

jscode, 'Initial JS player signature function name', group='sig')

2552

2553

jsi = JSInterpreter(jscode)

2554

initial_function = jsi.extract_function(funcname)

2555

return lambda s: initial_function([s])

2556

2557

def _decrypt_signature(self, s, video_id, player_url):

2558

"""Turn the encrypted s field into a working signature"""

2559

try:

2560

player_id = (player_url, self._signature_cache_id(s))

2561

if player_id not in self._player_cache:

2562

func = self._extract_signature_function(video_id, player_url, s)

2563

self._player_cache[player_id] = func

2564

func = self._player_cache[player_id]

2565

self._print_sig_code(func, s)

2566

return func(s)

2567

except Exception as e:

2568

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2569

2570

def _decrypt_nsig(self, s, video_id, player_url):

2571

"""Turn the encrypted n field into a working signature"""

2572

if player_url is None:

2573

raise ExtractorError('Cannot decrypt nsig without player_url')

2574

player_url = urljoin('https://www.youtube.com', player_url)

2575

2576

sig_id = ('nsig_value', s)

2577

if sig_id in self._player_cache:

2578

return self._player_cache[sig_id]

2579

2580

try:

2581

player_id = ('nsig', player_url)

2582

if player_id not in self._player_cache:

2583

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2584

func = self._player_cache[player_id]

2585

self._player_cache[sig_id] = func(s)

2586

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2587

return self._player_cache[sig_id]

2588

except Exception as e:

2589

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2590

2591

def _extract_n_function_name(self, jscode):

2592

nfunc, idx = self._search_regex(

2593

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2594

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2595

if not idx:

2596

return nfunc

2597

return json.loads(js_to_json(self._search_regex(

2598

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2599

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2600

2601

def _extract_n_function(self, video_id, player_url):

2602

player_id = self._extract_player_info(player_url)

2603

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2604

2605

if func_code:

2606

jsi = JSInterpreter(func_code)

2607

else:

2608

jscode = self._load_player(video_id, player_url)

2609

funcname = self._extract_n_function_name(jscode)

2610

jsi = JSInterpreter(jscode)

2611

func_code = jsi.extract_function_code(funcname)

2612

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2613

2614

if self.get_param('youtube_print_sig_code'):

2615

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2616

2617

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2618

2619

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2620

"""

2621

Extract signatureTimestamp (sts)

2622

Required to tell API what sig/player version is in use.

2623

"""

2624

sts = None

2625

if isinstance(ytcfg, dict):

2626

sts = int_or_none(ytcfg.get('STS'))

2627

2628

if not sts:

2629

# Attempt to extract from player

2630

if player_url is None:

2631

error_msg = 'Cannot extract signature timestamp without player_url.'

2632

if fatal:

2633

raise ExtractorError(error_msg)

2634

self.report_warning(error_msg)

2635

return

2636

code = self._load_player(video_id, player_url, fatal=fatal)

2637

if code:

2638

sts = int_or_none(self._search_regex(

2639

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2640

'JS player signature timestamp', group='sts', fatal=fatal))

2641

return sts

2642

2643

def _mark_watched(self, video_id, player_responses):

2644

playback_url = get_first(

2645

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2646

expected_type=url_or_none)

2647

if not playback_url:

2648

self.report_warning('Unable to mark watched')

2649

return

2650

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2651

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2652

2653

# cpn generation algorithm is reverse engineered from base.js.

2654

# In fact it works even with dummy cpn.

2655

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2656

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2663

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2664

2665

self._download_webpage(

2666

playback_url, video_id, 'Marking watched',

2667

'Unable to mark watched', fatal=False)

2668

2669

@staticmethod

2670

def _extract_urls(webpage):

2671

# Embedded YouTube player

2672

entries = [

2673

unescapeHTML(mobj.group('url'))

2674

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2685

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2686

\1''', webpage)]

2687

2688

# lazyYT YouTube embed

2689

entries.extend(list(map(

2690

unescapeHTML,

2691

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2692

2693

# Wordpress "YouTube Video Importer" plugin

2694

matches = re.findall(r'''(?x)<div[^>]+

2695

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2696

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2697

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2703

urls = YoutubeIE._extract_urls(webpage)

2704

return urls[0] if urls else None

2705

2706

@classmethod

2707

def extract_id(cls, url):

2708

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2709

if mobj is None:

2710

raise ExtractorError('Invalid URL: %s' % url)

2711

return mobj.group('id')

2712

2713

def _extract_chapters_from_json(self, data, duration):

2714

chapter_list = traverse_obj(

2715

data, (

2716

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2717

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2718

), expected_type=list)

2719

2720

return self._extract_chapters(

2721

chapter_list,

2722

chapter_time=lambda chapter: float_or_none(

2723

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2724

chapter_title=lambda chapter: traverse_obj(

2725

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2726

duration=duration)

2727

2728

def _extract_chapters_from_engagement_panel(self, data, duration):

2729

content_list = traverse_obj(

2730

data,

2731

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2732

expected_type=list, default=[])

2733

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2734

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2739

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2740

chapter_time, chapter_title, duration)

2741

for contents in content_list

))), [])

@staticmethod

def _extract_chapters_from_description(description, duration):

2746

chapters = [{'start_time': 0}]

2747

for timestamp, title in re.findall(

2748

r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''):

2749

start = parse_duration(timestamp)

2750

if start and title and chapters[-1]['start_time'] < start < duration:

2751

chapters[-1]['end_time'] = start

chapters.append({

'start_time': start,

'title': title,

})

chapters[-1]['end_time'] = duration

2757

return chapters[1:]

2758

2759

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2760

chapters = []

2761

last_chapter = {'start_time': 0}

2762

for idx, chapter in enumerate(chapter_list or []):

2763

title = chapter_title(chapter)

2764

start_time = chapter_time(chapter)

2765

if start_time is None:

2766

continue

2767

last_chapter['end_time'] = start_time

2768

if start_time < last_chapter['start_time']:

2769

if idx == 1:

2770

chapters.pop()

2771

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2772

else:

2773

self.report_warning(f'Invalid start time for chapter "{title}"')

2774

continue

2775

last_chapter = {'start_time': start_time, 'title': title}

2776

chapters.append(last_chapter)

2777

last_chapter['end_time'] = duration

2778

return chapters

2779

2780

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2781

return self._parse_json(self._search_regex(

2782

(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',

2783

regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)

2784

2785

def _extract_comment(self, comment_renderer, parent=None):

2786

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2791

2792

# note: timestamp is an estimate calculated from the current time and time_text

2793

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2794

author = self._get_text(comment_renderer, 'authorText')

2795

author_id = try_get(comment_renderer,

2796

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2797

2798

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2799

lambda x: x['likeCount']), compat_str)) or 0

2800

author_thumbnail = try_get(comment_renderer,

2801

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2802

2803

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2804

is_favorited = 'creatorHeart' in (try_get(

2805

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2810

'time_text': time_text,

2811

'like_count': votes,

2812

'is_favorited': is_favorited,

2813

'author': author,

2814

'author_id': author_id,

2815

'author_thumbnail': author_thumbnail,

2816

'author_is_uploader': author_is_uploader,

2817

'parent': parent or 'root'

2818

}

2819

2820

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2821

2822

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2823

2824

def extract_header(contents):

2825

_continuation = None

2826

for content in contents:

2827

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2828

expected_comment_count = self._get_count(

2829

comments_header_renderer, 'countText', 'commentsCount')

2830

2831

if expected_comment_count:

2832

tracker['est_total'] = expected_comment_count

2833

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2834

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2835

2836

sort_menu_item = try_get(

2837

comments_header_renderer,

2838

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2839

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2840

2841

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2842

if not _continuation:

2843

continue

2844

2845

sort_text = str_or_none(sort_menu_item.get('title'))

2846

if not sort_text:

2847

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2848

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2853

if not parent:

2854

tracker['current_page_thread'] = 0

2855

for content in contents:

2856

if not parent and tracker['total_parent_comments'] >= max_parents:

2857

yield

2858

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2859

comment_renderer = get_first(

2860

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2861

expected_type=dict, default={})

2862

2863

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2868

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2869

yield comment

2870

2871

# Attempt to get the replies

2872

comment_replies_renderer = try_get(

2873

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2874

2875

if comment_replies_renderer:

2876

tracker['current_page_thread'] += 1

2877

comment_entries_iter = self._comment_entries(

2878

comment_replies_renderer, ytcfg, video_id,

2879

parent=comment.get('id'), tracker=tracker)

2880

yield from itertools.islice(comment_entries_iter, min(

2881

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2882

2883

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2889

total_parent_comments=0,

2890

total_reply_comments=0)

2891

2892

# TODO: Deprecated

2893

# YouTube comments have a max depth of 2

2894

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2895

if max_depth:

2896

self._downloader.deprecation_warning(

2897

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2898

if max_depth == 1 and parent:

2899

return

2900

2901

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2902

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2903

2904

continuation = self._extract_continuation(root_continuation_data)

2905

2906

response = None

2907

is_forced_continuation = False

2908

is_first_continuation = parent is None

2909

if is_first_continuation and not continuation:

2910

# Sometimes you can get comments by generating the continuation yourself,

2911

# even if YouTube initially reports them being disabled - e.g. stories comments.

2912

# Note: if the comment section is actually disabled, YouTube may return a response with

2913

# required check_get_keys missing. So we will disable that check initially in this case.

2914

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2915

is_forced_continuation = True

2916

2917

for page_num in itertools.count(0):

2918

if not continuation:

2919

break

2920

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2921

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2922

if page_num == 0:

2923

if is_first_continuation:

2924

note_prefix = 'Downloading comment section API JSON'

2925

else:

2926

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2927

tracker['current_page_thread'], comment_prog_str)

2928

else:

2929

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2930

' ' if parent else '', ' replies' if parent else '',

2931

page_num, comment_prog_str)

2932

2933

response = self._extract_response(

2934

item_id=None, query=continuation,

2935

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2936

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2937

is_forced_continuation = False

2938

continuation_contents = traverse_obj(

2939

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2940

2941

continuation = None

2942

for continuation_section in continuation_contents:

2943

continuation_items = traverse_obj(

2944

continuation_section,

2945

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2946

get_all=False, expected_type=list) or []

2947

if is_first_continuation:

2948

continuation = extract_header(continuation_items)

2949

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2963

if message and not parent and tracker['running_total'] == 0:

2964

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2965

2966

@staticmethod

2967

def _generate_comment_continuation(video_id):

2968

"""

2969

Generates initial comment section continuation token from given video id

2970

"""

2971

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2972

return base64.b64encode(token.encode()).decode()

2973

2974

def _get_comments(self, ytcfg, video_id, contents, webpage):

2975

"""Entry for comment extraction"""

2976

def _real_comment_extract(contents):

2977

renderer = next((

2978

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2979

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2980

yield from self._comment_entries(renderer, ytcfg, video_id)

2981

2982

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2983

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2984

2985

@staticmethod

2986

def _get_checkok_params():

2987

return {'contentCheckOk': True, 'racyCheckOk': True}

2988

2989

@classmethod

2990

def _generate_player_context(cls, sts=None):

2991

context = {

2992

'html5Preference': 'HTML5_PREF_WANTS',

2993

}

2994

if sts is not None:

2995

context['signatureTimestamp'] = sts

2996

return {

2997

'playbackContext': {

2998

'contentPlaybackContext': context

2999

},

3000

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3005

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3006

return True

3007

3008

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3009

AGE_GATE_REASONS = (

3010

'confirm your age', 'age-restricted', 'inappropriate', # reason

3011

'age_verification_required', 'age_check_required', # status

3012

)

3013

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3014

3015

@staticmethod

3016

def _is_unplayable(player_response):

3017

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3018

3019

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3020

3021

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3022

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3023

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3024

headers = self.generate_api_headers(

3025

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3030

}

3031

yt_query.update(self._generate_player_context(sts))

3032

return self._extract_response(

3033

item_id=video_id, ep='player', query=yt_query,

3034

ytcfg=player_ytcfg, headers=headers, fatal=True,

3035

default_client=client,

3036

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3037

) or None

3038

3039

def _get_requested_clients(self, url, smuggled_data):

3040

requested_clients = []

3041

default = ['android', 'web']

3042

allowed_clients = sorted(

3043

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3044

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3045

for client in self._configuration_arg('player_client'):

3046

if client in allowed_clients:

3047

requested_clients.append(client)

3048

elif client == 'default':

3049

requested_clients.extend(default)

3050

elif client == 'all':

3051

requested_clients.extend(allowed_clients)

3052

else:

3053

self.report_warning(f'Skipping unsupported client {client}')

3054

if not requested_clients:

3055

requested_clients = default

3056

3057

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3058

requested_clients.extend(

3059

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3060

3061

return orderedSet(requested_clients)

3062

3063

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3064

initial_pr = None

3065

if webpage:

3066

initial_pr = self._extract_yt_initial_variable(

3067

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

3068

video_id, 'initial player response')

3069

3070

all_clients = set(clients)

3071

clients = clients[::-1]

3072

prs = []

3073

3074

def append_client(*client_names):

3075

""" Append the first client name that exists but not already used """

3076

for client_name in client_names:

3077

actual_client = _split_innertube_client(client_name)[0]

3078

if actual_client in INNERTUBE_CLIENTS:

3079

if actual_client not in all_clients:

3080

clients.append(client_name)

3081

all_clients.add(actual_client)

3082

return

3083

3084

# Android player_response does not have microFormats which are needed for

3085

# extraction of some data. So we return the initial_pr with formats

3086

# stripped out even if not requested by the user

3087

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3088

if initial_pr:

3089

pr = dict(initial_pr)

3090

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3095

player_url = None

3096

while clients:

3097

client, base_client, variant = _split_innertube_client(clients.pop())

3098

player_ytcfg = master_ytcfg if client == 'web' else {}

3099

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3100

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3101

3102

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3103

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3104

if 'js' in self._configuration_arg('player_skip'):

3105

require_js_player = False

3106

player_url = None

3107

3108

if not player_url and not tried_iframe_fallback and require_js_player:

3109

player_url = self._download_player_url(video_id)

3110

tried_iframe_fallback = True

3111

3112

try:

3113

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3114

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3115

except ExtractorError as e:

3116

if last_error:

3117

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3125

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3126

append_client(f'{base_client}_creator')

3127

elif self._is_agegated(pr):

3128

if variant == 'tv_embedded':

3129

append_client(f'{base_client}_embedded')

3130

elif not variant:

3131

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3137

return prs, player_url

3138

3139

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3140

itags, stream_ids = {}, []

3141

itag_qualities, res_qualities = {}, {}

3142

q = qualities([

3143

# Normally tiny is the smallest video-only formats. But

3144

# audio-only formats with unknown quality may get tagged as tiny

3145

'tiny',

3146

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3147

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3148

])

3149

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3150

3151

for fmt in streaming_formats:

3152

if fmt.get('targetDurationSec'):

3153

continue

3154

3155

itag = str_or_none(fmt.get('itag'))

3156

audio_track = fmt.get('audioTrack') or {}

3157

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3158

if stream_id in stream_ids:

3159

continue

3160

3161

quality = fmt.get('quality')

3162

height = int_or_none(fmt.get('height'))

3163

if quality == 'tiny' or not quality:

3164

quality = fmt.get('audioQuality', '').lower() or quality

3165

# The 3gp format (17) in android client has a quality of "small",

3166

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3172

if height:

3173

res_qualities[height] = quality

3174

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3175

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3176

# number of fragment that would subsequently requested with (`&sq=N`)

3177

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3178

continue

3179

3180

fmt_url = fmt.get('url')

3181

if not fmt_url:

3182

sc = compat_parse_qs(fmt.get('signatureCipher'))

3183

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3184

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3185

if not all((sc, fmt_url, player_url, encrypted_sig)):

3186

continue

3187

try:

3188

fmt_url += '&%s=%s' % (

3189

traverse_obj(sc, ('sp', -1)) or 'signature',

3190

self._decrypt_signature(encrypted_sig, video_id, player_url)

3191

)

3192

except ExtractorError as e:

3193

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3194

self.write_debug(e, only_once=True)

3195

continue

3196

3197

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3202

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3203

except ExtractorError as e:

3204

self.report_warning(

3205

'nsig extraction failed: You may experience throttling for some formats\n'

3206

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3207

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3212

stream_ids.append(stream_id)

3213

3214

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3215

language_preference = (

3216

10 if audio_track.get('audioIsDefault') and 10

3217

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3218

else -1)

3219

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3220

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3221

# Make sure to avoid false positives with small duration differences.

3222

# Eg: __2ABJjxzNo, ySuUZEjARPY

3223

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3224

if is_damaged:

3225

self.report_warning(

3226

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3227

dct = {

3228

'asr': int_or_none(fmt.get('audioSampleRate')),

3229

'filesize': int_or_none(fmt.get('contentLength')),

3230

'format_id': itag,

3231

'format_note': join_nonempty(

3232

'%s%s' % (audio_track.get('displayName') or '',

3233

' (default)' if language_preference > 0 else ''),

3234

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3235

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3236

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3237

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3238

'fps': int_or_none(fmt.get('fps')) or None,

3239

'height': height,

3240

'quality': q(quality),

3241

'has_drm': bool(fmt.get('drmFamilies')),

3242

'tbr': tbr,

3243

'url': fmt_url,

3244

'width': int_or_none(fmt.get('width')),

3245

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3246

'desc' if language_preference < -1 else ''),

3247

'language_preference': language_preference,

3248

# Strictly de-prioritize damaged and 3gp formats

3249

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3250

}

3251

mime_mobj = re.match(

3252

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3253

if mime_mobj:

3254

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3255

dct.update(parse_codecs(mime_mobj.group(2)))

3256

no_audio = dct.get('acodec') == 'none'

3257

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3263

dct['downloader_options'] = {

3264

# Youtube throttles chunks >~10M

3265

'http_chunk_size': 10485760,

3266

}

3267

if dct.get('ext'):

3268

dct['container'] = dct['ext'] + '_dash'

3269

yield dct

3270

3271

live_from_start = is_live and self.get_param('live_from_start')

3272

skip_manifests = self._configuration_arg('skip')

3273

if not self.get_param('youtube_include_hls_manifest', True):

3274

skip_manifests.append('hls')

3275

if not self.get_param('youtube_include_dash_manifest', True):

3276

skip_manifests.append('dash')

3277

get_dash = 'dash' not in skip_manifests and (

3278

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3279

get_hls = not live_from_start and 'hls' not in skip_manifests

3280

3281

def process_manifest_format(f, proto, itag):

3282

if itag in itags:

3283

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3284

return False

3285

itag = f'{itag}-{proto}'

3286

if itag:

3287

f['format_id'] = itag

3288

itags[itag] = proto

3289

3290

f['quality'] = next((

3291

q(qdict[val])

3292

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3293

if val in qdict), -1)

3294

return True

3295

3296

for sd in streaming_data:

3297

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3298

if hls_manifest_url:

3299

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3300

if process_manifest_format(f, 'hls', self._search_regex(

3301

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3302

yield f

3303

3304

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3305

if dash_manifest_url:

3306

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3307

if process_manifest_format(f, 'dash', f['format_id']):

3308

f['filesize'] = int_or_none(self._search_regex(

3309

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3310

if live_from_start:

3311

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3316

spec = get_first(

3317

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3318

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3323

args = args.split('#')

3324

counts = list(map(int_or_none, args[:5]))

3325

if len(args) != 8 or not all(counts):

3326

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3327

continue

3328

width, height, frame_count, cols, rows = counts

3329

N, sigh = args[6:]

3330

3331

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3332

fragment_count = frame_count / (cols * rows)

3333

fragment_duration = duration / fragment_count

3334

yield {

3335

'format_id': f'sb{i}',

3336

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3346

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3347

} for j in range(math.ceil(fragment_count))],

3348

}

3349

3350

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3351

webpage = None

3352

if 'webpage' not in self._configuration_arg('player_skip'):

3353

webpage = self._download_webpage(

3354

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3355

3356

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3357

3358

player_responses, player_url = self._extract_player_responses(

3359

self._get_requested_clients(url, smuggled_data),

3360

video_id, webpage, master_ytcfg)

3361

3362

return webpage, master_ytcfg, player_responses, player_url

3363

3364

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3365

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3366

is_live = get_first(video_details, 'isLive')

3367

if is_live is None:

3368

is_live = get_first(live_broadcast_details, 'isLiveNow')

3369

3370

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3371

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3372

3373

return live_broadcast_details, is_live, streaming_data, formats

3374

3375

def _real_extract(self, url):

3376

url, smuggled_data = unsmuggle_url(url, {})

3377

video_id = self._match_id(url)

3378

3379

base_url = self.http_scheme() + '//www.youtube.com/'

3380

webpage_url = base_url + 'watch?v=' + video_id

3381

3382

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3383

3384

playability_statuses = traverse_obj(

3385

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3386

3387

trailer_video_id = get_first(

3388

playability_statuses,

3389

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3390

expected_type=str)

3391

if trailer_video_id:

3392

return self.url_result(

3393

trailer_video_id, self.ie_key(), trailer_video_id)

3394

3395

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3396

if webpage else (lambda x: None))

3397

3398

video_details = traverse_obj(

3399

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3400

microformats = traverse_obj(

3401

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3402

expected_type=dict, default=[])

3403

video_title = (

3404

get_first(video_details, 'title')

3405

or self._get_text(microformats, (..., 'title'))

3406

or search_meta(['og:title', 'twitter:title', 'title']))

3407

video_description = get_first(video_details, 'shortDescription')

3408

3409

multifeed_metadata_list = get_first(

3410

player_responses,

3411

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3412

expected_type=str)

3413

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3414

if self.get_param('noplaylist'):

3415

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3420

# Unquote should take place before split on comma (,) since textual

3421

# fields may contain comma as well (see

3422

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3423

feed_data = compat_parse_qs(

3424

compat_urllib_parse_unquote_plus(feed))

3425

3426

def feed_entry(name):

3427

return try_get(

3428

feed_data, lambda x: x[name][0], compat_str)

3429

3430

feed_id = feed_entry('id')

3431

if not feed_id:

3432

continue

3433

feed_title = feed_entry('title')

3434

title = video_title

3435

if feed_title:

3436

title += ' (%s)' % feed_title

3437

entries.append({

3438

'_type': 'url_transparent',

3439

'ie_key': 'Youtube',

3440

'url': smuggle_url(

3441

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3442

{'force_singlefeed': True}),

3443

'title': title,

3444

})

3445

feed_ids.append(feed_id)

3446

self.to_screen(

3447

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3448

% (', '.join(feed_ids), video_id))

3449

return self.playlist_result(

3450

entries, video_id, video_title, video_description)

3451

3452

duration = int_or_none(

3453

get_first(video_details, 'lengthSeconds')

3454

or get_first(microformats, 'lengthSeconds')

3455

or parse_duration(search_meta('duration'))) or None

3456

3457

if get_first(video_details, 'isPostLiveDvr'):

3458

self.write_debug('Video is in Post-Live Manifestless mode')

3459

if duration or 0 > 4 * 3600:

3460

self.report_warning(

3461

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3462

'This is a known issue and patches are welcome')

3463

3464

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3465

video_id, microformats, video_details, player_responses, player_url, duration)

3466

3467

if not formats:

3468

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3469

self.report_drm(video_id)

3470

pemr = get_first(

3471

playability_statuses,

3472

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3473

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3474

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3475

if subreason:

3476

if subreason == 'The uploader has not made this video available in your country.':

3477

countries = get_first(microformats, 'availableCountries')

3478

if not countries:

3479

regions_allowed = search_meta('regionsAllowed')

3480

countries = regions_allowed.split(',') if regions_allowed else None

3481

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3482

reason += f'. {subreason}'

3483

if reason:

3484

self.raise_no_formats(reason, expected=True)

3485

3486

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3487

if not keywords and webpage:

3488

keywords = [

3489

unescapeHTML(m.group('content'))

3490

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3491

for keyword in keywords:

3492

if keyword.startswith('yt:stretch='):

3493

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3494

if mobj:

3495

# NB: float is intentional for forcing float division

3496

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3501

f['stretched_ratio'] = ratio

3502

break

3503

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3504

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3505

if thumbnail_url:

3506

thumbnails.append({

3507

'url': thumbnail_url,

3508

})

3509

original_thumbnails = thumbnails.copy()

3510

3511

# The best resolution thumbnails sometimes does not appear in the webpage

3512

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3513

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3514

thumbnail_names = [

3515

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3516

# in resolution, these are not the custom thumbnail. So de-prioritize them

3517

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3518

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3519

]

3520

n_thumbnail_names = len(thumbnail_names)

3521

thumbnails.extend({

3522

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3523

video_id=video_id, name=name, ext=ext,

3524

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3525

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3526

for thumb in thumbnails:

3527

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3528

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3529

self._remove_duplicate_formats(thumbnails)

3530

self._downloader._sort_thumbnails(original_thumbnails)

3531

3532

category = get_first(microformats, 'category') or search_meta('genre')

3533

channel_id = str_or_none(

3534

get_first(video_details, 'channelId')

3535

or get_first(microformats, 'externalChannelId')

3536

or search_meta('channelId'))

3537

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3538

3539

live_content = get_first(video_details, 'isLiveContent')

3540

is_upcoming = get_first(video_details, 'isUpcoming')

3541

if is_live is None:

3542

if is_upcoming or live_content is False:

3543

is_live = False

3544

if is_upcoming is None and (live_content or is_live):

3545

is_upcoming = False

3546

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3547

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3548

if not duration and live_end_time and live_start_time:

3549

duration = live_end_time - live_start_time

3550

3551

if is_live and self.get_param('live_from_start'):

3552

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3553

3554

formats.extend(self._extract_storyboard(player_responses, duration))

3555

3556

# Source is given priority since formats that throttle are given lower source_preference

3557

# When throttling issue is fully fixed, remove this

3558

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3563

'formats': formats,

3564

'thumbnails': thumbnails,

3565

# The best thumbnail that we are sure exists. Prevents unnecessary

3566

# URL checking if user don't care about getting the best possible thumbnail

3567

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3568

'description': video_description,

3569

'uploader': get_first(video_details, 'author'),

3570

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3571

'uploader_url': owner_profile_url,

3572

'channel_id': channel_id,

3573

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3574

'duration': duration,

3575

'view_count': int_or_none(

3576

get_first((video_details, microformats), (..., 'viewCount'))

3577

or search_meta('interactionCount')),

3578

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3579

'age_limit': 18 if (

3580

get_first(microformats, 'isFamilySafe') is False

3581

or search_meta('isFamilyFriendly') == 'false'

3582

or search_meta('og:restrictions:age') == '18+') else 0,

3583

'webpage_url': webpage_url,

3584

'categories': [category] if category else None,

3585

'tags': keywords,

3586

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3587

'is_live': is_live,

3588

'was_live': (False if is_live or is_upcoming or live_content is False

3589

else None if is_live is None or is_upcoming is None

3590

else live_content),

3591

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3592

'release_timestamp': live_start_time,

3593

}

3594

3595

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3596

if pctr:

3597

def get_lang_code(track):

3598

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3599

or track.get('languageCode'))

3600

3601

# Converted into dicts to remove duplicates

3602

captions = {

3603

get_lang_code(sub): sub

3604

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3605

translation_languages = {

3606

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3607

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3608

3609

def process_language(container, base_url, lang_code, sub_name, query):

3610

lang_subs = container.setdefault(lang_code, [])

3611

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3622

for lang_code, caption_track in captions.items():

3623

base_url = caption_track.get('baseUrl')

3624

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3625

if not base_url:

3626

continue

3627

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3628

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3633

if not caption_track.get('isTranslatable'):

3634

continue

3635

for trans_code, trans_name in translation_languages.items():

3636

if not trans_code:

3637

continue

3638

orig_trans_code = trans_code

3639

if caption_track.get('kind') != 'asr':

3640

if 'translated_subs' in self._configuration_arg('skip'):

3641

continue

3642

trans_code += f'-{lang_code}'

3643

trans_name += format_field(lang_name, template=' from %s')

3644

# Add an "-orig" label to the original language so that it can be distinguished.

3645

# The subs are returned without "-orig" as well for compatibility

3646

if lang_code == f'a-{orig_trans_code}':

3647

process_language(

3648

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3649

# Setting tlang=lang returns damaged subtitles.

3650

process_language(automatic_captions, base_url, trans_code, trans_name,

3651

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3652

info['automatic_captions'] = automatic_captions

3653

info['subtitles'] = subtitles

3654

3655

parsed_url = compat_urllib_parse_urlparse(url)

3656

for component in [parsed_url.fragment, parsed_url.query]:

3657

query = compat_parse_qs(component)

3658

for k, v in query.items():

3659

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3660

d_k += '_time'

3661

if d_k not in info and k in s_ks:

3662

info[d_k] = parse_duration(query[k][0])

3663

3664

# Youtube Music Auto-generated description

3665

if video_description:

3666

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3667

if mobj:

3668

release_year = mobj.group('release_year')

3669

release_date = mobj.group('release_date')

3670

if release_date:

3671

release_date = release_date.replace('-', '')

3672

if not release_year:

3673

release_year = release_date[:4]

3674

info.update({

3675

'album': mobj.group('album'.strip()),

3676

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3677

'track': mobj.group('track').strip(),

3678

'release_date': release_date,

3679

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3685

webpage, self._YT_INITIAL_DATA_RE, video_id,

3686

'yt initial data')

3687

if not initial_data:

3688

query = {'videoId': video_id}

3689

query.update(self._get_checkok_params())

3690

initial_data = self._extract_response(

3691

item_id=video_id, ep='next', fatal=False,

3692

ytcfg=master_ytcfg, query=query,

3693

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3694

note='Downloading initial data API JSON')

3695

3696

try: # This will error if there is no livechat

3697

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3698

except (KeyError, IndexError, TypeError):

3699

pass

3700

else:

3701

info.setdefault('subtitles', {})['live_chat'] = [{

3702

'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies

3703

'video_id': video_id,

3704

'ext': 'json',

3705

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3711

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3712

or self._extract_chapters_from_description(video_description, duration)

3713

or None)

3714

3715

contents = traverse_obj(

3716

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3717

expected_type=list, default=[])

3718

3719

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3720

if vpir:

3721

stl = vpir.get('superTitleLink')

3722

if stl:

3723

stl = self._get_text(stl)

3724

if try_get(

3725

vpir,

3726

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3727

info['location'] = stl

3728

else:

3729

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3730

if mobj:

3731

info.update({

3732

'series': mobj.group(1),

3733

'season_number': int(mobj.group(2)),

3734

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3739

list) or []):

3740

tbr = tlb.get('toggleButtonRenderer') or {}

3741

for getter, regex in [(

3742

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3743

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3744

lambda x: x['accessibility'],

3745

lambda x: x['accessibilityData']['accessibilityData'],

3746

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3747

label = (try_get(tbr, getter, dict) or {}).get('label')

3748

if label:

3749

mobj = re.match(regex, label)

3750

if mobj:

3751

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3752

break

3753

sbr_tooltip = try_get(

3754

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3755

if sbr_tooltip:

3756

like_count, dislike_count = sbr_tooltip.split(' / ')

3757

info.update({

3758

'like_count': str_to_int(like_count),

3759

'dislike_count': str_to_int(dislike_count),

3760

})

3761

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3762

if vsir:

3763

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3764

info.update({

3765

'channel': self._get_text(vor, 'title'),

3766

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3771

list) or []

3772

multiple_songs = False

3773

for row in rows:

3774

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3775

multiple_songs = True

3776

break

3777

for row in rows:

3778

mrr = row.get('metadataRowRenderer') or {}

3779

mrr_title = mrr.get('title')

3780

if not mrr_title:

3781

continue

3782

mrr_title = self._get_text(mrr, 'title')

3783

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3784

if mrr_title == 'License':

3785

info['license'] = mrr_contents_text

3786

elif not multiple_songs:

3787

if mrr_title == 'Album':

3788

info['album'] = mrr_contents_text

3789

elif mrr_title == 'Artist':

3790

info['artist'] = mrr_contents_text

3791

elif mrr_title == 'Song':

3792

info['track'] = mrr_contents_text

3793

3794

fallbacks = {

3795

'channel': 'uploader',

3796

'channel_id': 'uploader_id',

3797

'channel_url': 'uploader_url',

3798

}

3799

3800

# The upload date for scheduled, live and past live streams / premieres in microformats

3801

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3802

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3803

upload_date = (

3804

unified_strdate(get_first(microformats, 'uploadDate'))

3805

or unified_strdate(search_meta('uploadDate')))

3806

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3807

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3808

info['upload_date'] = upload_date

3809

3810

for to, frm in fallbacks.items():

3811

if not info.get(to):

3812

info[to] = info.get(frm)

3813

3814

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3820

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3821

is_membersonly = None

3822

is_premium = None

3823

if initial_data and is_private is not None:

3824

is_membersonly = False

3825

is_premium = False

3826

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3827

badge_labels = set()

3828

for content in contents:

3829

if not isinstance(content, dict):

3830

continue

3831

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3832

for badge_label in badge_labels:

3833

if badge_label.lower() == 'members only':

3834

is_membersonly = True

3835

elif badge_label.lower() == 'premium':

3836

is_premium = True

3837

elif badge_label.lower() == 'unlisted':

3838

is_unlisted = True

3839

3840

info['availability'] = self._availability(

3841

is_private=is_private,

3842

needs_premium=is_premium,

3843

needs_subscription=is_membersonly,

3844

needs_auth=info['age_limit'] >= 18,

3845

is_unlisted=None if is_private is None else is_unlisted)

3846

3847

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3848

3849

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3855

3856

@staticmethod

3857

def passthrough_smuggled_data(func):

3858

def _smuggle(entries, smuggled_data):

3859

for entry in entries:

3860

# TODO: Convert URL to music.youtube instead.

3861

# Do we need to passthrough any other smuggled_data?

3862

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3863

yield entry

3864

3865

@functools.wraps(func)

3866

def wrapper(self, url):

3867

url, smuggled_data = unsmuggle_url(url, {})

3868

if self.is_music_url(url):

3869

smuggled_data['is_music_url'] = True

3870

info_dict = func(self, url, smuggled_data)

3871

if smuggled_data and info_dict.get('entries'):

3872

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3877

channel_id = self._html_search_meta(

3878

'channelId', webpage, 'channel id', default=None)

3879

if channel_id:

3880

return channel_id

3881

channel_url = self._html_search_meta(

3882

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3883

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3884

'twitter:app:url:googleplay'), webpage, 'channel url')

3885

return self._search_regex(

3886

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3887

channel_url, 'channel id')

3888

3889

@staticmethod

3890

def _extract_basic_item_renderer(item):

3891

# Modified from _extract_grid_item_renderer

3892

known_basic_renderers = (

3893

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3894

)

3895

for key, renderer in item.items():

3896

if not isinstance(renderer, dict):

3897

continue

3898

elif key in known_basic_renderers:

3899

return renderer

3900

elif key.startswith('grid') and key.endswith('Renderer'):

3901

return renderer

3902

3903

def _grid_entries(self, grid_renderer):

3904

for item in grid_renderer['items']:

3905

if not isinstance(item, dict):

3906

continue

3907

renderer = self._extract_basic_item_renderer(item)

3908

if not isinstance(renderer, dict):

3909

continue

3910

title = self._get_text(renderer, 'title')

3911

3912

# playlist

3913

playlist_id = renderer.get('playlistId')

3914

if playlist_id:

3915

yield self.url_result(

3916

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3917

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3922

if video_id:

3923

yield self._extract_video(renderer)

3924

continue

3925

# channel

3926

channel_id = renderer.get('channelId')

3927

if channel_id:

3928

yield self.url_result(

3929

'https://www.youtube.com/channel/%s' % channel_id,

3930

ie=YoutubeTabIE.ie_key(), video_title=title)

3931

continue

3932

# generic endpoint URL support

3933

ep_url = urljoin('https://www.youtube.com/', try_get(

3934

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3935

compat_str))

3936

if ep_url:

3937

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3938

if ie.suitable(ep_url):

3939

yield self.url_result(

3940

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3941

break

3942

3943

def _music_reponsive_list_entry(self, renderer):

3944

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3945

if video_id:

3946

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3947

ie=YoutubeIE.ie_key(), video_id=video_id)

3948

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3949

if playlist_id:

3950

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3951

if video_id:

3952

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3953

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3954

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3955

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3956

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3957

if browse_id:

3958

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3959

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3960

3961

def _shelf_entries_from_content(self, shelf_renderer):

3962

content = shelf_renderer.get('content')

3963

if not isinstance(content, dict):

3964

return

3965

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3966

if renderer:

3967

# TODO: add support for nested playlists so each shelf is processed

3968

# as separate playlist

3969

# TODO: this includes only first N items

3970

yield from self._grid_entries(renderer)

3971

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3977

ep = try_get(

3978

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3979

compat_str)

3980

shelf_url = urljoin('https://www.youtube.com', ep)

3981

if shelf_url:

3982

# Skipping links to another channels, note that checking for

3983

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3984

# will not work

3985

if skip_channels and '/channels?' in shelf_url:

3986

return

3987

title = self._get_text(shelf_renderer, 'title')

3988

yield self.url_result(shelf_url, video_title=title)

3989

# Shelf may not contain shelf URL, fallback to extraction from content

3990

yield from self._shelf_entries_from_content(shelf_renderer)

3991

3992

def _playlist_entries(self, video_list_renderer):

3993

for content in video_list_renderer['contents']:

3994

if not isinstance(content, dict):

3995

continue

3996

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3997

if not isinstance(renderer, dict):

3998

continue

3999

video_id = renderer.get('videoId')

4000

if not video_id:

4001

continue

4002

yield self._extract_video(renderer)

4003

4004

def _rich_entries(self, rich_grid_renderer):

4005

renderer = try_get(

4006

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4007

video_id = renderer.get('videoId')

4008

if not video_id:

4009

return

4010

yield self._extract_video(renderer)

4011

4012

def _video_entry(self, video_renderer):

4013

video_id = video_renderer.get('videoId')

4014

if video_id:

4015

return self._extract_video(video_renderer)

4016

4017

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4018

url = urljoin('https://youtube.com', traverse_obj(

4019

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4020

if url:

4021

return self.url_result(

4022

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4023

4024

def _post_thread_entries(self, post_thread_renderer):

4025

post_renderer = try_get(

4026

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4027

if not post_renderer:

4028

return

4029

# video attachment

4030

video_renderer = try_get(

4031

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4032

video_id = video_renderer.get('videoId')

4033

if video_id:

4034

entry = self._extract_video(video_renderer)

4035

if entry:

4036

yield entry

4037

# playlist attachment

4038

playlist_id = try_get(

4039

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

4040

if playlist_id:

4041

yield self.url_result(

4042

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4043

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4044

# inline video links

4045

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4046

for run in runs:

4047

if not isinstance(run, dict):

4048

continue

4049

ep_url = try_get(

4050

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

4051

if not ep_url:

4052

continue

4053

if not YoutubeIE.suitable(ep_url):

4054

continue

4055

ep_video_id = YoutubeIE._match_id(ep_url)

4056

if video_id == ep_video_id:

4057

continue

4058

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4059

4060

def _post_thread_continuation_entries(self, post_thread_continuation):

4061

contents = post_thread_continuation.get('contents')

4062

if not isinstance(contents, list):

4063

return

4064

for content in contents:

4065

renderer = content.get('backstagePostThreadRenderer')

4066

if not isinstance(renderer, dict):

4067

continue

4068

yield from self._post_thread_entries(renderer)

4069

4070

r''' # unused

4071

def _rich_grid_entries(self, contents):

4072

for content in contents:

4073

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4074

if video_renderer:

4075

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4081

# continuation_list is modified in-place with continuation_list = [continuation_token]

4082

continuation_list[:] = [None]

4083

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4084

for content in contents:

4085

if not isinstance(content, dict):

4086

continue

4087

is_renderer = traverse_obj(

4088

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4089

expected_type=dict)

4090

if not is_renderer:

4091

renderer = content.get('richItemRenderer')

4092

if renderer:

4093

for entry in self._rich_entries(renderer):

4094

yield entry

4095

continuation_list[0] = self._extract_continuation(parent_renderer)

4096

continue

4097

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4098

for isr_content in isr_contents:

4099

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4104

'gridRenderer': self._grid_entries,

4105

'reelShelfRenderer': self._grid_entries,

4106

'shelfRenderer': self._shelf_entries,

4107

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4108

'backstagePostThreadRenderer': self._post_thread_entries,

4109

'videoRenderer': lambda x: [self._video_entry(x)],

4110

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4111

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4112

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4113

}

4114

for key, renderer in isr_content.items():

4115

if key not in known_renderers:

4116

continue

4117

for entry in known_renderers[key](renderer):

4118

if entry:

4119

yield entry

4120

continuation_list[0] = self._extract_continuation(renderer)

4121

break

4122

4123

if not continuation_list[0]:

4124

continuation_list[0] = self._extract_continuation(is_renderer)

4125

4126

if not continuation_list[0]:

4127

continuation_list[0] = self._extract_continuation(parent_renderer)

4128

4129

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4130

continuation_list = [None]

4131

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4132

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4137

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4138

yield from extract_entries(parent_renderer)

4139

continuation = continuation_list[0]

4140

4141

for page_num in itertools.count(1):

4142

if not continuation:

4143

break

4144

headers = self.generate_api_headers(

4145

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4146

response = self._extract_response(

4147

item_id=f'{item_id} page {page_num}',

4148

query=continuation, headers=headers, ytcfg=ytcfg,

4149

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4154

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4155

visitor_data = self._extract_visitor_data(response) or visitor_data

4156

4157

known_continuation_renderers = {

4158

'playlistVideoListContinuation': self._playlist_entries,

4159

'gridContinuation': self._grid_entries,

4160

'itemSectionContinuation': self._post_thread_continuation_entries,

4161

'sectionListContinuation': extract_entries, # for feeds

4162

}

4163

continuation_contents = try_get(

4164

response, lambda x: x['continuationContents'], dict) or {}

4165

continuation_renderer = None

4166

for key, value in continuation_contents.items():

4167

if key not in known_continuation_renderers:

4168

continue

4169

continuation_renderer = value

4170

continuation_list = [None]

4171

yield from known_continuation_renderers[key](continuation_renderer)

4172

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4173

break

4174

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4179

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4180

'gridVideoRenderer': (self._grid_entries, 'items'),

4181

'gridChannelRenderer': (self._grid_entries, 'items'),

4182

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4183

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4184

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4185

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4186

}

4187

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4188

continuation_items = try_get(

4189

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4190

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4191

video_items_renderer = None

4192

for key, value in continuation_item.items():

4193

if key not in known_renderers:

4194

continue

4195

video_items_renderer = {known_renderers[key][1]: continuation_items}

4196

continuation_list = [None]

4197

yield from known_renderers[key][0](video_items_renderer)

4198

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4199

break

4200

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4206

for tab in tabs:

4207

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4208

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4213

4214

def _extract_uploader(self, data):

4215

uploader = {}

4216

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4217

owner = try_get(

4218

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4219

if owner:

4220

owner_text = owner.get('text')

4221

uploader['uploader'] = self._search_regex(

4222

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4223

uploader['uploader_id'] = try_get(

4224

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4225

uploader['uploader_url'] = urljoin(

4226

'https://www.youtube.com/',

4227

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4228

return {k: v for k, v in uploader.items() if v is not None}

4229

4230

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4231

playlist_id = title = description = channel_url = channel_name = channel_id = None

4232

tags = []

4233

4234

selected_tab = self._extract_selected_tab(tabs)

4235

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4236

renderer = try_get(

4237

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4238

if renderer:

4239

channel_name = renderer.get('title')

4240

channel_url = renderer.get('channelUrl')

4241

channel_id = renderer.get('externalId')

4242

else:

4243

renderer = try_get(

4244

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4245

4246

if renderer:

4247

title = renderer.get('title')

4248

description = renderer.get('description', '')

4249

playlist_id = channel_id

4250

tags = renderer.get('keywords', '').split()

4251

4252

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4253

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4254

def _get_uncropped(url):

4255

return url_or_none((url or '').split('=')[0] + '=s0')

4256

4257

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4258

if avatar_thumbnails:

4259

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4260

if uncropped_avatar:

4261

avatar_thumbnails.append({

4262

'url': uncropped_avatar,

4263

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4268

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4269

for banner in channel_banners:

4270

banner['preference'] = -10

4271

4272

if channel_banners:

4273

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4274

if uncropped_banner:

4275

channel_banners.append({

4276

'url': uncropped_banner,

4277

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4282

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4283

4284

if playlist_id is None:

4285

playlist_id = item_id

4286

4287

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4288

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4289

if title is None:

4290

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4291

title += format_field(selected_tab, 'title', ' - %s')

4292

title += format_field(selected_tab, 'expandedText', ' - %s')

4293

4294

metadata = {

4295

'playlist_id': playlist_id,

4296

'playlist_title': title,

4297

'playlist_description': description,

4298

'uploader': channel_name,

4299

'uploader_id': channel_id,

4300

'uploader_url': channel_url,

4301

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4302

'tags': tags,

4303

'view_count': self._get_count(playlist_stats, 1),

4304

'availability': self._extract_availability(data),

4305

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4306

'playlist_count': self._get_count(playlist_stats, 0),

4307

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4308

}

4309

if not channel_id:

4310

metadata.update(self._extract_uploader(data))

4311

metadata.update({

4312

'channel': metadata['uploader'],

4313

'channel_id': metadata['uploader_id'],

4314

'channel_url': metadata['uploader_url']})

4315

return self.playlist_result(

4316

self._entries(

4317

selected_tab, playlist_id, ytcfg,

4318

self._extract_account_syncid(ytcfg, data),

4319

self._extract_visitor_data(data, ytcfg)),

4320

**metadata)

4321

4322

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4323

first_id = last_id = response = None

4324

for page_num in itertools.count(1):

4325

videos = list(self._playlist_entries(playlist))

4326

if not videos:

4327

return

4328

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4329

if start >= len(videos):

4330

return

4331

yield from videos[start:]

4332

first_id = first_id or videos[0]['id']

4333

last_id = videos[-1]['id']

4334

watch_endpoint = try_get(

4335

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4336

headers = self.generate_api_headers(

4337

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4338

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4339

query = {

4340

'playlistId': playlist_id,

4341

'videoId': watch_endpoint.get('videoId') or last_id,

4342

'index': watch_endpoint.get('index') or len(videos),

4343

'params': watch_endpoint.get('params') or 'OAE%3D'

4344

}

4345

response = self._extract_response(

4346

item_id='%s page %d' % (playlist_id, page_num),

4347

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4348

check_get_keys='contents'

4349

)

4350

playlist = try_get(

4351

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4352

4353

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4354

title = playlist.get('title') or try_get(

4355

data, lambda x: x['titleText']['simpleText'], compat_str)

4356

playlist_id = playlist.get('playlistId') or item_id

4357

4358

# Delegating everything except mix playlists to regular tab-based playlist URL

4359

playlist_url = urljoin(url, try_get(

4360

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4361

compat_str))

4362

4363

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4364

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4365

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4366

4367

if playlist_url and playlist_url != url and not is_known_unviewable:

4368

return self.url_result(

4369

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4370

video_title=title)

4371

4372

return self.playlist_result(

4373

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4374

playlist_id=playlist_id, playlist_title=title)

4375

4376

def _extract_availability(self, data):

4377

"""

4378

Gets the availability of a given playlist/tab.

4379

Note: Unless YouTube tells us explicitly, we do not assume it is public

4380

@param data: response

4381

"""

4382

is_private = is_unlisted = None

4383

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4384

badge_labels = self._extract_badges(renderer)

4385

4386

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4387

privacy_dropdown_entries = try_get(

4388

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4389

for renderer_dict in privacy_dropdown_entries:

4390

is_selected = try_get(

4391

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4392

if not is_selected:

4393

continue

4394

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4395

if label:

4396

badge_labels.add(label.lower())

4397

break

4398

4399

for badge_label in badge_labels:

4400

if badge_label == 'unlisted':

4401

is_unlisted = True

4402

elif badge_label == 'private':

4403

is_private = True

4404

elif badge_label == 'public':

4405

is_unlisted = is_private = False

4406

return self._availability(is_private, False, False, False, is_unlisted)

4407

4408

@staticmethod

4409

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4410

sidebar_renderer = try_get(

4411

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4412

for item in sidebar_renderer:

4413

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4418

"""

4419

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4420

"""

4421

browse_id = params = None

4422

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4423

if not renderer:

4424

return

4425

menu_renderer = try_get(

4426

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4427

for menu_item in menu_renderer:

4428

if not isinstance(menu_item, dict):

4429

continue

4430

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4431

text = try_get(

4432

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4433

if not text or text.lower() != 'show unavailable videos':

4434

continue

4435

browse_endpoint = try_get(

4436

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4437

browse_id = browse_endpoint.get('browseId')

4438

params = browse_endpoint.get('params')

4439

break

4440

4441

headers = self.generate_api_headers(

4442

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4443

visitor_data=self._extract_visitor_data(data, ytcfg))

4444

query = {

4445

'params': params or 'wgYCCAA=',

4446

'browseId': browse_id or 'VL%s' % item_id

4447

}

4448

return self._extract_response(

4449

item_id=item_id, headers=headers, query=query,

4450

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4451

note='Downloading API JSON with unavailable videos')

4452

4453

@functools.cached_property

4454

def skip_webpage(self):

4455

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4456

4457

def _extract_webpage(self, url, item_id, fatal=True):

4458

retries = self.get_param('extractor_retries', 3)

4459

count = -1

4460

webpage = data = last_error = None

4461

while count < retries:

4462

count += 1

4463

# Sometimes youtube returns a webpage with incomplete ytInitialData

4464

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4465

if last_error:

4466

self.report_warning('%s. Retrying ...' % last_error)

4467

try:

4468

webpage = self._download_webpage(

4469

url, item_id,

4470

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4471

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4472

except ExtractorError as e:

4473

if isinstance(e.cause, network_exceptions):

4474

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4475

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4485

except ExtractorError as e:

4486

if fatal:

4487

raise

4488

self.report_warning(error_to_compat_str(e))

4489

break

4490

4491

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4492

break

4493

4494

last_error = 'Incomplete yt initial data received'

4495

if count >= retries:

4496

if fatal:

4497

raise ExtractorError(last_error)

4498

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4504

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4505

if not ytcfg and self.is_authenticated:

4506

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4507

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4508

raise ExtractorError(

4509

f'{msg}. If you are not downloading private content, or '

4510

'your cookies are only for the first account and channel,'

4511

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4512

expected=True)

4513

self.report_warning(msg, only_once=True)

4514

4515

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4516

data = None

4517

if not self.skip_webpage:

4518

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4519

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4520

# Reject webpage data if redirected to home page without explicitly requesting

4521

selected_tab = self._extract_selected_tab(traverse_obj(

4522

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4523

if (url != 'https://www.youtube.com/feed/recommended'

4524

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4525

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4526

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4527

if fatal:

4528

raise ExtractorError(msg, expected=True)

4529

self.report_warning(msg, only_once=True)

4530

if not data:

4531

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4532

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4533

return data, ytcfg

4534

4535

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4536

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4537

resolve_response = self._extract_response(

4538

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4539

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4540

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4541

for ep_key, ep in endpoints.items():

4542

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4543

if params:

4544

return self._extract_response(

4545

item_id=item_id, query=params, ep=ep, headers=headers,

4546

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4547

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4548

err_note = 'Failed to resolve url (does the playlist exist?)'

4549

if fatal:

4550

raise ExtractorError(err_note, expected=True)

4551

self.report_warning(err_note, item_id)

4552

4553

_SEARCH_PARAMS = None

4554

4555

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4556

data = {'query': query}

4557

if params is NO_DEFAULT:

4558

params = self._SEARCH_PARAMS

4559

if params:

4560

data['params'] = params

4561

4562

content_keys = (

4563

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4564

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4565

# ytmusic search

4566

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4567

('continuationContents', ),

4568

)

4569

display_id = f'query "{query}"'

4570

check_get_keys = tuple({keys[0] for keys in content_keys})

4571

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4572

self._report_playlist_authcheck(ytcfg, fatal=False)

4573

4574

continuation_list = [None]

4575

search = None

4576

for page_num in itertools.count(1):

4577

data.update(continuation_list[0] or {})

4578

headers = self.generate_api_headers(

4579

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4580

search = self._extract_response(

4581

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4582

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4583

slr_contents = traverse_obj(search, *content_keys)

4584

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4585

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4590

IE_DESC = 'YouTube Tabs'

4591

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4600

(?P<not_channel>

4601

feed/|hashtag/|

4602

(?:playlist|watch)\?.*?\blist=

4603

)|

4604

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4609

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4610

}

4611

IE_NAME = 'youtube:tab'

4612

4613

_TESTS = [{

4614

'note': 'playlists, multipage',

4615

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4616

'playlist_mincount': 94,

4617

'info_dict': {

4618

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4619

'title': 'Igor Kleiner - Playlists',

4620

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4621

'uploader': 'Igor Kleiner',

4622

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4623

'channel': 'Igor Kleiner',

4624

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4625

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4626

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4627

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4628

'channel_follower_count': int

4629

},

4630

}, {

4631

'note': 'playlists, multipage, different order',

4632

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4633

'playlist_mincount': 94,

4634

'info_dict': {

4635

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4636

'title': 'Igor Kleiner - Playlists',

4637

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4638

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4639

'uploader': 'Igor Kleiner',

4640

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4641

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4642

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4643

'channel': 'Igor Kleiner',

4644

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4645

'channel_follower_count': int

4646

},

4647

}, {

4648

'note': 'playlists, series',

4649

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4650

'playlist_mincount': 5,

4651

'info_dict': {

4652

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4653

'title': '3Blue1Brown - Playlists',

4654

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4655

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4656

'uploader': '3Blue1Brown',

4657

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4658

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4659

'channel': '3Blue1Brown',

4660

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4661

'tags': ['Mathematics'],

4662

'channel_follower_count': int

4663

},

4664

}, {

4665

'note': 'playlists, singlepage',

4666

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4667

'playlist_mincount': 4,

4668

'info_dict': {

4669

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4670

'title': 'ThirstForScience - Playlists',

4671

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4672

'uploader': 'ThirstForScience',

4673

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4674

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4675

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4676

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4677

'tags': 'count:13',

4678

'channel': 'ThirstForScience',

4679

'channel_follower_count': int

4680

}

4681

}, {

4682

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4683

'only_matching': True,

4684

}, {

4685

'note': 'basic, single video playlist',

4686

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4687

'info_dict': {

4688

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4689

'uploader': 'Sergey M.',

4690

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4691

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4696

'channel': 'Sergey M.',

4697

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4698

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4699

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4704

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4705

'info_dict': {

4706

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4707

'uploader': 'Sergey M.',

4708

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4709

'title': 'youtube-dl empty playlist',

4710

'tags': [],

4711

'channel': 'Sergey M.',

4712

'description': '',

4713

'modified_date': '20160902',

4714

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4715

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4716

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4722

'info_dict': {

4723

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4724

'title': 'lex will - Home',

4725

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4726

'uploader': 'lex will',

4727

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4728

'channel': 'lex will',

4729

'tags': ['bible', 'history', 'prophesy'],

4730

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4731

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4732

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4733

'channel_follower_count': int

4734

},

4735

'playlist_mincount': 2,

4736

}, {

4737

'note': 'Videos tab',

4738

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4739

'info_dict': {

4740

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4741

'title': 'lex will - Videos',

4742

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4743

'uploader': 'lex will',

4744

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4745

'tags': ['bible', 'history', 'prophesy'],

4746

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4747

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4748

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4749

'channel': 'lex will',

4750

'channel_follower_count': int

4751

},

4752

'playlist_mincount': 975,

4753

}, {

4754

'note': 'Videos tab, sorted by popular',

4755

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4756

'info_dict': {

4757

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4758

'title': 'lex will - Videos',

4759

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4760

'uploader': 'lex will',

4761

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4762

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4763

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4764

'channel': 'lex will',

4765

'tags': ['bible', 'history', 'prophesy'],

4766

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4767

'channel_follower_count': int

4768

},

4769

'playlist_mincount': 199,

4770

}, {

4771

'note': 'Playlists tab',

4772

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4773

'info_dict': {

4774

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4775

'title': 'lex will - Playlists',

4776

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4777

'uploader': 'lex will',

4778

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4779

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4780

'channel': 'lex will',

4781

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4782

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'tags': ['bible', 'history', 'prophesy'],

4784

'channel_follower_count': int

4785

},

4786

'playlist_mincount': 17,

4787

}, {

4788

'note': 'Community tab',

4789

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4790

'info_dict': {

4791

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4792

'title': 'lex will - Community',

4793

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4794

'uploader': 'lex will',

4795

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4796

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4797

'channel': 'lex will',

4798

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4799

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'tags': ['bible', 'history', 'prophesy'],

4801

'channel_follower_count': int

4802

},

4803

'playlist_mincount': 18,

4804

}, {

4805

'note': 'Channels tab',

4806

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4807

'info_dict': {

4808

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'title': 'lex will - Channels',

4810

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4811

'uploader': 'lex will',

4812

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4813

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4814

'channel': 'lex will',

4815

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4816

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4817

'tags': ['bible', 'history', 'prophesy'],

4818

'channel_follower_count': int

4819

},

4820

'playlist_mincount': 12,

4821

}, {

4822

'note': 'Search tab',

4823

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4824

'playlist_mincount': 40,

4825

'info_dict': {

4826

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4827

'title': '3Blue1Brown - Search - linear algebra',

4828

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4829

'uploader': '3Blue1Brown',

4830

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4831

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4832

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4833

'tags': ['Mathematics'],

4834

'channel': '3Blue1Brown',

4835

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4836

'channel_follower_count': int

4837

},

4838

}, {

4839

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4840

'only_matching': True,

4841

}, {

4842

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4843

'only_matching': True,

4844

}, {

4845

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4846

'only_matching': True,

4847

}, {

4848

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4849

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4850

'info_dict': {

4851

'title': '29C3: Not my department',

4852

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4853

'uploader': 'Christiaan008',

4854

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4855

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4856

'tags': [],

4857

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4858

'view_count': int,

4859

'modified_date': '20150605',

4860

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4861

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4862

'channel': 'Christiaan008',

4863

},

4864

'playlist_count': 96,

4865

}, {

4866

'note': 'Large playlist',

4867

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4868

'info_dict': {

4869

'title': 'Uploads from Cauchemar',

4870

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4871

'uploader': 'Cauchemar',

4872

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4873

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4874

'tags': [],

4875

'modified_date': r're:\d{8}',

4876

'channel': 'Cauchemar',

4877

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4878

'view_count': int,

4879

'description': '',

4880

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4881

},

4882

'playlist_mincount': 1123,

4883

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4884

}, {

4885

'note': 'even larger playlist, 8832 videos',

4886

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4887

'only_matching': True,

4888

}, {

4889

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4890

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4891

'info_dict': {

4892

'title': 'Uploads from Interstellar Movie',

4893

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4894

'uploader': 'Interstellar Movie',

4895

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4896

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4897

'tags': [],

4898

'view_count': int,

4899

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4900

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4901

'channel': 'Interstellar Movie',

4902

'description': '',

4903

'modified_date': r're:\d{8}',

4904

},

4905

'playlist_mincount': 21,

4906

}, {

4907

'note': 'Playlist with "show unavailable videos" button',

4908

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4909

'info_dict': {

4910

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4911

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4912

'uploader': 'Phim Siêu Nhân Nhật Bản',

4913

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4914

'view_count': int,

4915

'channel': 'Phim Siêu Nhân Nhật Bản',

4916

'tags': [],

4917

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4918

'description': '',

4919

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4920

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4921

'modified_date': r're:\d{8}',

4922

},

4923

'playlist_mincount': 200,

4924

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4925

}, {

4926

'note': 'Playlist with unavailable videos in page 7',

4927

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4928

'info_dict': {

4929

'title': 'Uploads from BlankTV',

4930

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4931

'uploader': 'BlankTV',

4932

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4933

'channel': 'BlankTV',

4934

'channel_url': 'https://www.youtube.com/c/blanktv',

4935

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4936

'view_count': int,

4937

'tags': [],

4938

'uploader_url': 'https://www.youtube.com/c/blanktv',

4939

'modified_date': r're:\d{8}',

4940

'description': '',

4941

},

4942

'playlist_mincount': 1000,

4943

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4944

}, {

4945

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4946

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4947

'info_dict': {

4948

'title': 'Data Analysis with Dr Mike Pound',

4949

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4950

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4951

'uploader': 'Computerphile',

4952

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4953

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4954

'tags': [],

4955

'view_count': int,

4956

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4957

'channel_url': 'https://www.youtube.com/user/Computerphile',

4958

'channel': 'Computerphile',

4959

},

4960

'playlist_mincount': 11,

4961

}, {

4962

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4963

'only_matching': True,

4964

}, {

4965

'note': 'Playlist URL that does not actually serve a playlist',

4966

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4971

'uploader': 'STREEM',

4972

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4973

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4974

'upload_date': '20150526',

4975

'license': 'Standard YouTube License',

4976

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4977

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4984

},

4985

'skip': 'This video is not available.',

4986

'add_ie': [YoutubeIE.ie_key()],

4987

}, {

4988

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4989

'only_matching': True,

4990

}, {

4991

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4992

'only_matching': True,

4993

}, {

4994

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4995

'info_dict': {

4996

'id': 'GgL890LIznQ', # This will keep changing

4997

'ext': 'mp4',

4998

'title': str,

4999

'uploader': 'Sky News',

5000

'uploader_id': 'skynews',

5001

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5002

'upload_date': r're:\d{8}',

5003

'description': str,

5004

'categories': ['News & Politics'],

5005

'tags': list,

5006

'like_count': int,

5007

'release_timestamp': 1642502819,

5008

'channel': 'Sky News',

5009

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5010

'age_limit': 0,

5011

'view_count': int,

5012

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5013

'playable_in_embed': True,

5014

'release_date': '20220118',

5015

'availability': 'public',

5016

'live_status': 'is_live',

5017

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5018

'channel_follower_count': int

5019

},

5020

'params': {

5021

'skip_download': True,

5022

},

5023

'expected_warnings': ['Ignoring subtitle tracks found in '],

5024

}, {

5025

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5030

'uploader': 'The Young Turks',

5031

'uploader_id': 'TheYoungTurks',

5032

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5033

'upload_date': '20150715',

5034

'license': 'Standard YouTube License',

5035

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5036

'categories': ['News & Politics'],

5037

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5042

},

5043

'only_matching': True,

5044

}, {

5045

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5046

'only_matching': True,

5047

}, {

5048

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5049

'only_matching': True,

5050

}, {

5051

'note': 'A channel that is not live. Should raise error',

5052

'url': 'https://www.youtube.com/user/numberphile/live',

5053

'only_matching': True,

5054

}, {

5055

'url': 'https://www.youtube.com/feed/trending',

5056

'only_matching': True,

5057

}, {

5058

'url': 'https://www.youtube.com/feed/library',

5059

'only_matching': True,

5060

}, {

5061

'url': 'https://www.youtube.com/feed/history',

5062

'only_matching': True,

5063

}, {

5064

'url': 'https://www.youtube.com/feed/subscriptions',

5065

'only_matching': True,

5066

}, {

5067

'url': 'https://www.youtube.com/feed/watch_later',

5068

'only_matching': True,

5069

}, {

5070

'note': 'Recommended - redirects to home page.',

5071

'url': 'https://www.youtube.com/feed/recommended',

5072

'only_matching': True,

5073

}, {

5074

'note': 'inline playlist with not always working continuations',

5075

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5076

'only_matching': True,

5077

}, {

5078

'url': 'https://www.youtube.com/course',

5079

'only_matching': True,

5080

}, {

5081

'url': 'https://www.youtube.com/zsecurity',

5082

'only_matching': True,

5083

}, {

5084

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5085

'only_matching': True,

5086

}, {

5087

'url': 'https://www.youtube.com/TheYoungTurks/live',

5088

'only_matching': True,

5089

}, {

5090

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5097

}, {

5098

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5099

'only_matching': True,

5100

}, {

5101

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5102

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5103

'only_matching': True

5104

}, {

5105

'note': '/browse/ should redirect to /channel/',

5106

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5107

'only_matching': True

5108

}, {

5109

'note': 'VLPL, should redirect to playlist?list=PL...',

5110

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5111

'info_dict': {

5112

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5113

'uploader': 'NoCopyrightSounds',

5114

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5115

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5116

'title': 'NCS Releases',

5117

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5118

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5119

'modified_date': r're:\d{8}',

5120

'view_count': int,

5121

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5122

'tags': [],

5123

'channel': 'NoCopyrightSounds',

5124

},

5125

'playlist_mincount': 166,

5126

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5127

}, {

5128

'note': 'Topic, should redirect to playlist?list=UU...',

5129

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5130

'info_dict': {

5131

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5132

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5133

'title': 'Uploads from Royalty Free Music - Topic',

5134

'uploader': 'Royalty Free Music - Topic',

5135

'tags': [],

5136

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5137

'channel': 'Royalty Free Music - Topic',

5138

'view_count': int,

5139

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5140

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5141

'modified_date': r're:\d{8}',

5142

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5143

'description': '',

5144

},

5145

'expected_warnings': [

5146

'The URL does not have a videos tab',

5147

r'[Uu]navailable videos (are|will be) hidden',

5148

],

5149

'playlist_mincount': 101,

5150

}, {

5151

'note': 'Topic without a UU playlist',

5152

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5153

'info_dict': {

5154

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5155

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5156

'tags': [],

5157

},

5158

'expected_warnings': [

5159

'the playlist redirect gave error',

5160

],

5161

'playlist_mincount': 9,

5162

}, {

5163

'note': 'Youtube music Album',

5164

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5165

'info_dict': {

5166

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5167

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5172

'modified_date': r're:\d{8}',

5173

},

5174

'playlist_count': 50,

5175

}, {

5176

'note': 'unlisted single video playlist',

5177

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5178

'info_dict': {

5179

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5180

'uploader': 'colethedj',

5181

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5182

'title': 'yt-dlp unlisted playlist test',

5183

'availability': 'unlisted',

5184

'tags': [],

5185

'modified_date': '20211208',

5186

'channel': 'colethedj',

5187

'view_count': int,

5188

'description': '',

5189

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5190

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5191

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5196

'url': 'https://www.youtube.com/feed/recommended',

5197

'info_dict': {

5198

'id': 'recommended',

5199

'title': 'recommended',

5200

'tags': [],

5201

},

5202

'playlist_mincount': 50,

5203

'params': {

5204

'skip_download': True,

5205

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5206

},

5207

}, {

5208

'note': 'API Fallback: /videos tab, sorted by oldest first',

5209

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5210

'info_dict': {

5211

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5212

'title': 'Cody\'sLab - Videos',

5213

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5214

'uploader': 'Cody\'sLab',

5215

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5216

'channel': 'Cody\'sLab',

5217

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5218

'tags': [],

5219

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5220

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5221

'channel_follower_count': int

5222

},

5223

'playlist_mincount': 650,

5224

'params': {

5225

'skip_download': True,

5226

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5227

},

5228

}, {

5229

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5230

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5231

'info_dict': {

5232

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5233

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5234

'title': 'Uploads from Royalty Free Music - Topic',

5235

'uploader': 'Royalty Free Music - Topic',

5236

'modified_date': r're:\d{8}',

5237

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5238

'description': '',

5239

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5240

'tags': [],

5241

'channel': 'Royalty Free Music - Topic',

5242

'view_count': int,

5243

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5244

},

5245

'expected_warnings': [

5246

'does not have a videos tab',

5247

r'[Uu]navailable videos (are|will be) hidden',

5248

],

5249

'playlist_mincount': 101,

5250

'params': {

5251

'skip_download': True,

5252

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5253

},

5254

}, {

5255

'note': 'non-standard redirect to regional channel',

5256

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5257

'only_matching': True

5258

}, {

5259

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5260

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5261

'info_dict': {

5262

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5263

'modified_date': '20220407',

5264

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5265

'tags': [],

5266

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5267

'uploader': 'pukkandan',

5268

'availability': 'unlisted',

5269

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5270

'channel': 'pukkandan',

5271

'description': 'Test for collaborative playlist',

5272

'title': 'yt-dlp test - collaborative playlist',

5273

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5274

},

5275

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5280

return False if YoutubeIE.suitable(url) else super().suitable(url)

5281

5282

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5283

5284

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5285

def _real_extract(self, url, smuggled_data):

5286

item_id = self._match_id(url)

5287

url = compat_urlparse.urlunparse(

5288

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5289

compat_opts = self.get_param('compat_opts', [])

5290

5291

def get_mobj(url):

5292

mobj = self._URL_RE.match(url).groupdict()

5293

mobj.update((k, '') for k, v in mobj.items() if v is None)

5294

return mobj

5295

5296

mobj, redirect_warning = get_mobj(url), None

5297

# Youtube returns incomplete data if tabname is not lower case

5298

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5299

if is_channel:

5300

if smuggled_data.get('is_music_url'):

5301

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5302

item_id = item_id[2:]

5303

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5304

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5305

mdata = self._extract_tab_endpoint(

5306

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5307

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5308

get_all=False, expected_type=compat_str)

5309

if not murl:

5310

raise ExtractorError('Failed to resolve album to playlist')

5311

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5312

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5313

pre = f'https://www.youtube.com/channel/{item_id}'

5314

5315

original_tab_name = tab

5316

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5317

# Home URLs should redirect to /videos/

5318

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5319

'To download only the videos in the home page, add a "/featured" to the URL')

5320

tab = '/videos'

5321

5322

url = ''.join((pre, tab, post))

5323

mobj = get_mobj(url)

5324

5325

# Handle both video/playlist URLs

5326

qs = parse_qs(url)

5327

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5328

5329

if not video_id and mobj['not_channel'].startswith('watch'):

5330

if not playlist_id:

5331

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5332

raise ExtractorError('Unable to recognize tab page')

5333

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5334

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5335

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5336

mobj = get_mobj(url)

5337

5338

if video_id and playlist_id:

5339

if self.get_param('noplaylist'):

5340

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5341

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5342

ie=YoutubeIE.ie_key(), video_id=video_id)

5343

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5344

5345

data, ytcfg = self._extract_data(url, item_id)

5346

5347

# YouTube may provide a non-standard redirect to the regional channel

5348

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5349

redirect_url = traverse_obj(

5350

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5351

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5352

redirect_url = ''.join((

5353

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5354

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5355

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5356

5357

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5358

if tabs:

5359

selected_tab = self._extract_selected_tab(tabs)

5360

selected_tab_name = selected_tab.get('title', '').lower()

5361

if selected_tab_name == 'home':

5362

selected_tab_name = 'featured'

5363

requested_tab_name = mobj['tab'][1:]

5364

if 'no-youtube-channel-redirect' not in compat_opts:

5365

if requested_tab_name == 'live':

5366

# Live tab should have redirected to the video

5367

raise ExtractorError('The channel is not currently live', expected=True)

5368

if requested_tab_name not in ('', selected_tab_name):

5369

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5370

if not original_tab_name:

5371

if item_id[:2] == 'UC':

5372

# Topic channels don't have /videos. Use the equivalent playlist instead

5373

pl_id = f'UU{item_id[2:]}'

5374

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5375

try:

5376

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5377

except ExtractorError:

5378

redirect_warning += ' and the playlist redirect gave error'

5379

else:

5380

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5381

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5382

if selected_tab_name and selected_tab_name != requested_tab_name:

5383

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5384

else:

5385

raise ExtractorError(redirect_warning, expected=True)

5386

5387

if redirect_warning:

5388

self.to_screen(redirect_warning)

5389

self.write_debug(f'Final URL: {url}')

5390

5391

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5392

if 'no-youtube-unavailable-videos' not in compat_opts:

5393

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5394

self._extract_and_report_alerts(data, only_once=True)

5395

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5396

if tabs:

5397

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5398

5399

playlist = traverse_obj(

5400

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5401

if playlist:

5402

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5403

5404

video_id = traverse_obj(

5405

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5406

if video_id:

5407

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5408

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5409

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5410

ie=YoutubeIE.ie_key(), video_id=video_id)

5411

5412

raise ExtractorError('Unable to recognize tab page')

5413

5414

5415

class YoutubePlaylistIE(InfoExtractor):

5416

IE_DESC = 'YouTube playlists'

5417

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5428

)''' % {

5429

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5430

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5431

}

5432

IE_NAME = 'youtube:playlist'

5433

_TESTS = [{

5434

'note': 'issue #673',

5435

'url': 'PLBB231211A4F62143',

5436

'info_dict': {

5437

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5438

'id': 'PLBB231211A4F62143',

5439

'uploader': 'Wickman',

5440

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5441

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5442

'view_count': int,

5443

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5444

'modified_date': r're:\d{8}',

5445

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5446

'channel': 'Wickman',

5447

'tags': [],

5448

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5449

},

5450

'playlist_mincount': 29,

5451

}, {

5452

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5453

'info_dict': {

5454

'title': 'YDL_safe_search',

5455

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5456

},

5457

'playlist_count': 2,

5458

'skip': 'This playlist is private',

5459

}, {

5460

'note': 'embedded',

5461

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5466

'uploader': 'milan',

5467

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5468

'description': '',

5469

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5470

'tags': [],

5471

'modified_date': '20140919',

5472

'view_count': int,

5473

'channel': 'milan',

5474

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5475

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5476

},

5477

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5478

}, {

5479

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5480

'playlist_mincount': 654,

5481

'info_dict': {

5482

'title': '2018 Chinese New Singles (11/6 updated)',

5483

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5484

'uploader': 'LBK',

5485

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5486

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5487

'channel': 'LBK',

5488

'view_count': int,

5489

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5490

'tags': [],

5491

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5492

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5493

'modified_date': r're:\d{8}',

5494

},

5495

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5496

}, {

5497

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5498

'only_matching': True,

5499

}, {

5500

# music album playlist

5501

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5502

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5507

if YoutubeTabIE.suitable(url):

5508

return False

5509

from ..utils import parse_qs

5510

qs = parse_qs(url)

5511

if qs.get('v', [None])[0]:

5512

return False

5513

return super().suitable(url)

5514

5515

def _real_extract(self, url):

5516

playlist_id = self._match_id(url)

5517

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5518

url = update_url_query(

5519

'https://www.youtube.com/playlist',

5520

parse_qs(url) or {'list': playlist_id})

5521

if is_music_url:

5522

url = smuggle_url(url, {'is_music_url': True})

5523

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5524

5525

5526

class YoutubeYtBeIE(InfoExtractor):

5527

IE_DESC = 'youtu.be'

5528

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5529

_TESTS = [{

5530

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5535

'uploader': 'Backus-Page House Museum',

5536

'uploader_id': 'backuspagemuseum',

5537

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5538

'upload_date': '20161008',

5539

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5540

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5545

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5546

'channel': 'Backus-Page House Museum',

5547

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5548

'live_status': 'not_live',

5549

'view_count': int,

5550

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5551

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5557

},

5558

}, {

5559

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5560

'only_matching': True,

5561

}]

5562

5563

def _real_extract(self, url):

5564

mobj = self._match_valid_url(url)

5565

video_id = mobj.group('id')

5566

playlist_id = mobj.group('playlist_id')

5567

return self.url_result(

5568

update_url_query('https://www.youtube.com/watch', {

5569

'v': video_id,

5570

'list': playlist_id,

5571

'feature': 'youtu.be',

5572

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5573

5574

5575

class YoutubeLivestreamEmbedIE(InfoExtractor):

5576

IE_DESC = 'YouTube livestream embeds'

5577

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5578

_TESTS = [{

5579

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5580

'only_matching': True,

5581

}]

5582

5583

def _real_extract(self, url):

5584

channel_id = self._match_id(url)

5585

return self.url_result(

5586

f'https://www.youtube.com/channel/{channel_id}/live',

5587

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5588

5589

5590

class YoutubeYtUserIE(InfoExtractor):

5591

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5592

IE_NAME = 'youtube:user'

5593

_VALID_URL = r'ytuser:(?P<id>.+)'

5594

_TESTS = [{

5595

'url': 'ytuser:phihag',

5596

'only_matching': True,

5597

}]

5598

5599

def _real_extract(self, url):

5600

user_id = self._match_id(url)

5601

return self.url_result(

5602

'https://www.youtube.com/user/%s/videos' % user_id,

5603

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5604

5605

5606

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5607

IE_NAME = 'youtube:favorites'

5608

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5609

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5610

_LOGIN_REQUIRED = True

5611

_TESTS = [{

5612

'url': ':ytfav',

5613

'only_matching': True,

5614

}, {

5615

'url': ':ytfavorites',

5616

'only_matching': True,

5617

}]

5618

5619

def _real_extract(self, url):

5620

return self.url_result(

5621

'https://www.youtube.com/playlist?list=LL',

5622

ie=YoutubeTabIE.ie_key())

5623

5624

5625

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5626

IE_NAME = 'youtube:notif'

5627

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5628

_VALID_URL = r':ytnotif(?:ication)?s?'

5629

_LOGIN_REQUIRED = True

5630

_TESTS = [{

5631

'url': ':ytnotif',

5632

'only_matching': True,

5633

}, {

5634

'url': ':ytnotifications',

5635

'only_matching': True,

5636

}]

5637

5638

def _extract_notification_menu(self, response, continuation_list):

5639

notification_list = traverse_obj(

5640

response,

5641

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5642

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5643

expected_type=list) or []

5644

continuation_list[0] = None

5645

for item in notification_list:

5646

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5647

if entry:

5648

yield entry

5649

continuation = item.get('continuationItemRenderer')

5650

if continuation:

5651

continuation_list[0] = continuation

5652

5653

def _extract_notification_renderer(self, notification):

5654

video_id = traverse_obj(

5655

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5656

url = f'https://www.youtube.com/watch?v={video_id}'

5657

channel_id = None

5658

if not video_id:

5659

browse_ep = traverse_obj(

5660

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5661

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5662

post_id = self._search_regex(

5663

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5664

'post id', default=None)

5665

if not channel_id or not post_id:

5666

return

5667

# The direct /post url redirects to this in the browser

5668

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5669

5670

channel = traverse_obj(

5671

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5672

expected_type=str)

5673

title = self._search_regex(

5674

rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),

5675

'video title', default=None)

5676

if title:

5677

title = title.replace('\xad', '') # remove soft hyphens

5678

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5679

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5685

'video_id': video_id,

5686

'title': title,

5687

'channel_id': channel_id,

5688

'channel': channel,

5689

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5690

'upload_date': upload_date,

5691

}

5692

5693

def _notification_menu_entries(self, ytcfg):

5694

continuation_list = [None]

5695

response = None

5696

for page in itertools.count(1):

5697

ctoken = traverse_obj(

5698

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5699

response = self._extract_response(

5700

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5701

ep='notification/get_notification_menu', check_get_keys='actions',

5702

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5703

yield from self._extract_notification_menu(response, continuation_list)

5704

if not continuation_list[0]:

5705

break

5706

5707

def _real_extract(self, url):

5708

display_id = 'notifications'

5709

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5710

self._report_playlist_authcheck(ytcfg)

5711

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5712

5713

5714

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5715

IE_DESC = 'YouTube search'

5716

IE_NAME = 'youtube:search'

5717

_SEARCH_KEY = 'ytsearch'

5718

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5719

_TESTS = [{

5720

'url': 'ytsearch5:youtube-dl test video',

5721

'playlist_count': 5,

5722

'info_dict': {

5723

'id': 'youtube-dl test video',

5724

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5730

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5731

_SEARCH_KEY = 'ytsearchdate'

5732

IE_DESC = 'YouTube search, newest videos first'

5733

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5734

_TESTS = [{

5735

'url': 'ytsearchdate5:youtube-dl test video',

5736

'playlist_count': 5,

5737

'info_dict': {

5738

'id': 'youtube-dl test video',

5739

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5745

IE_DESC = 'YouTube search URLs with sorting and filter support'

5746

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5747

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5748

_TESTS = [{

5749

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5750

'playlist_mincount': 5,

5751

'info_dict': {

5752

'id': 'youtube-dl test video',

5753

'title': 'youtube-dl test video',

5754

}

5755

}, {

5756

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5757

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5764

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5775

'only_matching': True,

5776

}]

5777

5778

def _real_extract(self, url):

5779

qs = parse_qs(url)

5780

query = (qs.get('search_query') or qs.get('q'))[0]

5781

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5782

5783

5784

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5785

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5786

IE_NAME = 'youtube:music:search_url'

5787

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5788

_TESTS = [{

5789

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5790

'playlist_count': 16,

5791

'info_dict': {

5792

'id': 'royalty free music',

5793

'title': 'royalty free music',

5794

}

5795

}, {

5796

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5797

'playlist_mincount': 30,

5798

'info_dict': {

5799

'id': 'royalty free music - songs',

5800

'title': 'royalty free music - songs',

5801

},

5802

'params': {'extract_flat': 'in_playlist'}

5803

}, {

5804

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5805

'playlist_mincount': 30,

5806

'info_dict': {

5807

'id': 'royalty free music - community playlists',

5808

'title': 'royalty free music - community playlists',

5809

},

5810

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5815

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5816

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5817

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5818

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5819

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5820

}

5821

5822

def _real_extract(self, url):

5823

qs = parse_qs(url)

5824

query = (qs.get('search_query') or qs.get('q'))[0]

5825

params = qs.get('sp', (None,))[0]

5826

if params:

5827

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5828

else:

5829

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5830

params = self._SECTIONS.get(section)

5831

if not params:

5832

section = None

5833

title = join_nonempty(query, section, delim=' - ')

5834

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5835

5836

5837

class YoutubeFeedsInfoExtractor(InfoExtractor):

5838

"""

5839

Base class for feed extractors

5840

Subclasses must re-define the _FEED_NAME property.

5841

"""

5842

_LOGIN_REQUIRED = True

5843

_FEED_NAME = 'feeds'

5844

5845

def _real_initialize(self):

5846

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5851

5852

def _real_extract(self, url):

5853

return self.url_result(

5854

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5855

5856

5857

class YoutubeWatchLaterIE(InfoExtractor):

5858

IE_NAME = 'youtube:watchlater'

5859

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5860

_VALID_URL = r':ytwatchlater'

5861

_TESTS = [{

5862

'url': ':ytwatchlater',

5863

'only_matching': True,

5864

}]

5865

5866

def _real_extract(self, url):

5867

return self.url_result(

5868

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5869

5870

5871

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5872

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5873

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5874

_FEED_NAME = 'recommended'

5875

_LOGIN_REQUIRED = False

5876

_TESTS = [{

5877

'url': ':ytrec',

5878

'only_matching': True,

5879

}, {

5880

'url': ':ytrecommended',

5881

'only_matching': True,

5882

}, {

5883

'url': 'https://youtube.com',

5884

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5889

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5890

_VALID_URL = r':ytsub(?:scription)?s?'

5891

_FEED_NAME = 'subscriptions'

5892

_TESTS = [{

5893

'url': ':ytsubs',

5894

'only_matching': True,

5895

}, {

5896

'url': ':ytsubscriptions',

5897

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5902

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5903

_VALID_URL = r':ythis(?:tory)?'

5904

_FEED_NAME = 'history'

5905

_TESTS = [{

5906

'url': ':ythistory',

5907

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5912

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5913

IE_NAME = 'youtube:stories'

5914

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5915

_TESTS = [{

5916

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5917

'only_matching': True,

5918

}]

5919

5920

def _real_extract(self, url):

5921

playlist_id = f'RLTD{self._match_id(url)}'

5922

return self.url_result(

5923

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5924

ie=YoutubeTabIE, video_id=playlist_id)

5925

5926

5927

class YoutubeTruncatedURLIE(InfoExtractor):

5928

IE_NAME = 'youtube:truncated_url'

5929

IE_DESC = False # Do not list

5930

_VALID_URL = r'''(?x)

5931

(?:https?://)?

5932

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5933

(?:watch\?(?:

5934

feature=[a-z_]+|

5935

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5948

'only_matching': True,

5949

}, {

5950

'url': 'https://www.youtube.com/watch?',

5951

'only_matching': True,

5952

}, {

5953

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5954

'only_matching': True,

5955

}, {

5956

'url': 'https://www.youtube.com/watch?feature=foo',

5957

'only_matching': True,

5958

}, {

5959

'url': 'https://www.youtube.com/watch?hl=en-GB',

5960

'only_matching': True,

5961

}, {

5962

'url': 'https://www.youtube.com/watch?t=2372',

5963

'only_matching': True,

5964

}]

5965

5966

def _real_extract(self, url):

5967

raise ExtractorError(

5968

'Did you forget to quote the URL? Remember that & is a meta '

5969

'character in most shells, so you want to put the URL in quotes, '

5970

'like youtube-dl '

5971

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5972

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5977

IE_NAME = 'youtube:clip'

5978

IE_DESC = False # Do not list

5979

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5980

5981

def _real_extract(self, url):

5982

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5983

return self.url_result(url, 'Generic')

5984

5985

5986

class YoutubeTruncatedIDIE(InfoExtractor):

5987

IE_NAME = 'youtube:truncated_id'

5988

IE_DESC = False # Do not list

5989

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5990

5991

_TESTS = [{

5992

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5993

'only_matching': True,

5994

}]

5995

5996

def _real_extract(self, url):

5997

video_id = self._match_id(url)

5998

raise ExtractorError(

5999

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6000

expected=True)