jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import functools
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	classproperty,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	float_or_none,
	40	format_field,
	41	get_first,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73	# any clients starting with _ cannot be explicity requested by the user
	74	INNERTUBE_CLIENTS = {
	75	'web': {
	76	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	77	'INNERTUBE_CONTEXT': {
	78	'client': {
	79	'clientName': 'WEB',
	80	'clientVersion': '2.20211221.00.00',
	81	}
	82	},
	83	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	84	},
	85	'web_embedded': {
	86	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	87	'INNERTUBE_CONTEXT': {
	88	'client': {
	89	'clientName': 'WEB_EMBEDDED_PLAYER',
	90	'clientVersion': '1.20211215.00.01',
	91	},
	92	},
	93	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	94	},
	95	'web_music': {
	96	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	97	'INNERTUBE_HOST': 'music.youtube.com',
	98	'INNERTUBE_CONTEXT': {
	99	'client': {
	100	'clientName': 'WEB_REMIX',
	101	'clientVersion': '1.20211213.00.00',
	102	}
	103	},
	104	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	105	},
	106	'web_creator': {
	107	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	108	'INNERTUBE_CONTEXT': {
	109	'client': {
	110	'clientName': 'WEB_CREATOR',
	111	'clientVersion': '1.20211220.02.00',
	112	}
	113	},
	114	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	115	},
	116	'android': {
	117	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	118	'INNERTUBE_CONTEXT': {
	119	'client': {
	120	'clientName': 'ANDROID',
	121	'clientVersion': '16.49',
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '16.49',
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '4.57',
	144	}
	145	},
	146	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	147	'REQUIRE_JS_PLAYER': False
	148	},
	149	'android_creator': {
	150	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	151	'INNERTUBE_CONTEXT': {
	152	'client': {
	153	'clientName': 'ANDROID_CREATOR',
	154	'clientVersion': '21.47',
	155	},
	156	},
	157	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	158	'REQUIRE_JS_PLAYER': False
	159	},
	160	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	161	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	162	'ios': {
	163	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	164	'INNERTUBE_CONTEXT': {
	165	'client': {
	166	'clientName': 'IOS',
	167	'clientVersion': '16.46',
	168	'deviceModel': 'iPhone14,3',
	169	}
	170	},
	171	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	172	'REQUIRE_JS_PLAYER': False
	173	},
	174	'ios_embedded': {
	175	'INNERTUBE_CONTEXT': {
	176	'client': {
	177	'clientName': 'IOS_MESSAGES_EXTENSION',
	178	'clientVersion': '16.46',
	179	'deviceModel': 'iPhone14,3',
	180	},
	181	},
	182	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	183	'REQUIRE_JS_PLAYER': False
	184	},
	185	'ios_music': {
	186	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	187	'INNERTUBE_CONTEXT': {
	188	'client': {
	189	'clientName': 'IOS_MUSIC',
	190	'clientVersion': '4.57',
	191	},
	192	},
	193	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	194	'REQUIRE_JS_PLAYER': False
	195	},
	196	'ios_creator': {
	197	'INNERTUBE_CONTEXT': {
	198	'client': {
	199	'clientName': 'IOS_CREATOR',
	200	'clientVersion': '21.47',
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	# mweb has 'ultralow' formats
	207	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	208	'mweb': {
	209	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	210	'INNERTUBE_CONTEXT': {
	211	'client': {
	212	'clientName': 'MWEB',
	213	'clientVersion': '2.20211221.01.00',
	214	}
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	217	},
	218	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	219	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	220	'tv_embedded': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	225	'clientVersion': '2.0',
	226	},
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	229	},
	230	}
	231
	232
	233	def _split_innertube_client(client_name):
	234	variant, *base = client_name.rsplit('.', 1)
	235	if base:
	236	return variant, base[0], variant
	237	base, *variant = client_name.split('_', 1)
	238	return client_name, base, variant[0] if variant else None
	239
	240
	241	def build_innertube_clients():
	242	THIRD_PARTY = {
	243	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	244	}
	245	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	246	priority = qualities(BASE_CLIENTS[::-1])
	247
	248	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	249	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	250	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	251	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	252	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	253
	254	_, base_client, variant = _split_innertube_client(client)
	255	ytcfg['priority'] = 10 * priority(base_client)
	256
	257	if not variant:
	258	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	259	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	260	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	261	embedscreen['priority'] -= 3
	262	elif variant == 'embedded':
	263	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	ytcfg['priority'] -= 2
	265	else:
	266	ytcfg['priority'] -= 3
	267
	268
	269	build_innertube_clients()
	270
	271
	272	class YoutubeBaseInfoExtractor(InfoExtractor):
	273	"""Provide base functions for Youtube extractors"""
	274
	275	_RESERVED_NAMES = (
	276	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	277	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	278	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	279	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	280
	281	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	282
	283	# _NETRC_MACHINE = 'youtube'
	284
	285	# If True it will raise an error if no login info is provided
	286	_LOGIN_REQUIRED = False
	287
	288	_INVIDIOUS_SITES = (
	289	# invidious-redirect websites
	290	r'(?:www\.)?redirect\.invidious\.io',
	291	r'(?:(?:www\|dev)\.)?invidio\.us',
	292	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	293	r'(?:www\.)?invidious\.pussthecat\.org',
	294	r'(?:www\.)?invidious\.zee\.li',
	295	r'(?:www\.)?invidious\.ethibox\.fr',
	296	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	297	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	298	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	299	# youtube-dl invidious instances list
	300	r'(?:(?:www\|no)\.)?invidiou\.sh',
	301	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	302	r'(?:www\.)?invidious\.kabi\.tk',
	303	r'(?:www\.)?invidious\.mastodon\.host',
	304	r'(?:www\.)?invidious\.zapashcanon\.fr',
	305	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	306	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	307	r'(?:www\.)?invidious\.himiko\.cloud',
	308	r'(?:www\.)?invidious\.reallyancient\.tech',
	309	r'(?:www\.)?invidious\.tube',
	310	r'(?:www\.)?invidiou\.site',
	311	r'(?:www\.)?invidious\.site',
	312	r'(?:www\.)?invidious\.xyz',
	313	r'(?:www\.)?invidious\.nixnet\.xyz',
	314	r'(?:www\.)?invidious\.048596\.xyz',
	315	r'(?:www\.)?invidious\.drycat\.fr',
	316	r'(?:www\.)?inv\.skyn3t\.in',
	317	r'(?:www\.)?tube\.poal\.co',
	318	r'(?:www\.)?tube\.connect\.cafe',
	319	r'(?:www\.)?vid\.wxzm\.sx',
	320	r'(?:www\.)?vid\.mint\.lgbt',
	321	r'(?:www\.)?vid\.puffyan\.us',
	322	r'(?:www\.)?yewtu\.be',
	323	r'(?:www\.)?yt\.elukerio\.org',
	324	r'(?:www\.)?yt\.lelux\.fi',
	325	r'(?:www\.)?invidious\.ggc-project\.de',
	326	r'(?:www\.)?yt\.maisputain\.ovh',
	327	r'(?:www\.)?ytprivate\.com',
	328	r'(?:www\.)?invidious\.13ad\.de',
	329	r'(?:www\.)?invidious\.toot\.koeln',
	330	r'(?:www\.)?invidious\.fdn\.fr',
	331	r'(?:www\.)?watch\.nettohikari\.com',
	332	r'(?:www\.)?invidious\.namazso\.eu',
	333	r'(?:www\.)?invidious\.silkky\.cloud',
	334	r'(?:www\.)?invidious\.exonip\.de',
	335	r'(?:www\.)?invidious\.riverside\.rocks',
	336	r'(?:www\.)?invidious\.blamefran\.net',
	337	r'(?:www\.)?invidious\.moomoo\.de',
	338	r'(?:www\.)?ytb\.trom\.tf',
	339	r'(?:www\.)?yt\.cyberhost\.uk',
	340	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	341	r'(?:www\.)?qklhadlycap4cnod\.onion',
	342	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	343	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	344	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	345	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	346	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	347	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	348	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	349	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	350	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	351	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	352	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	353	r'(?:www\.)?piped\.kavin\.rocks',
	354	r'(?:www\.)?piped\.silkky\.cloud',
	355	r'(?:www\.)?piped\.tokhmi\.xyz',
	356	r'(?:www\.)?piped\.moomoo\.me',
	357	r'(?:www\.)?il\.ax',
	358	r'(?:www\.)?piped\.syncpundit\.com',
	359	r'(?:www\.)?piped\.mha\.fi',
	360	r'(?:www\.)?piped\.mint\.lgbt',
	361	r'(?:www\.)?piped\.privacy\.com\.de',
	362	)
	363
	364	def _initialize_consent(self):
	365	cookies = self._get_cookies('https://www.youtube.com/')
	366	if cookies.get('__Secure-3PSID'):
	367	return
	368	consent_id = None
	369	consent = cookies.get('CONSENT')
	370	if consent:
	371	if 'YES' in consent.value:
	372	return
	373	consent_id = self._search_regex(
	374	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	375	if not consent_id:
	376	consent_id = random.randint(100, 999)
	377	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	378
	379	def _initialize_pref(self):
	380	cookies = self._get_cookies('https://www.youtube.com/')
	381	pref_cookie = cookies.get('PREF')
	382	pref = {}
	383	if pref_cookie:
	384	try:
	385	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	386	except ValueError:
	387	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	388	pref.update({'hl': 'en', 'tz': 'UTC'})
	389	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	390
	391	def _real_initialize(self):
	392	self._initialize_pref()
	393	self._initialize_consent()
	394	self._check_login_required()
	395
	396	def _check_login_required(self):
	397	if self._LOGIN_REQUIRED and not self._cookies_passed:
	398	self.raise_login_required('Login details are needed to download this content', method='cookies')
	399
	400	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+})\s;'
	401	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+})\s*;'
	402	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	403
	404	def _get_default_ytcfg(self, client='web'):
	405	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	406
	407	def _get_innertube_host(self, client='web'):
	408	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	409
	410	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	411	# try_get but with fallback to default ytcfg client values when present
	412	_func = lambda y: try_get(y, getter, expected_type)
	413	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	414
	415	def _extract_client_name(self, ytcfg, default_client='web'):
	416	return self._ytcfg_get_safe(
	417	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	418	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	419
	420	def _extract_client_version(self, ytcfg, default_client='web'):
	421	return self._ytcfg_get_safe(
	422	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	423	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	424
	425	def _extract_api_key(self, ytcfg=None, default_client='web'):
	426	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	427
	428	def _extract_context(self, ytcfg=None, default_client='web'):
	429	context = get_first(
	430	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	431	# Enforce language and tz for extraction
	432	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	433	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	434	return context
	435
	436	_SAPISID = None
	437
	438	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	439	time_now = round(time.time())
	440	if self._SAPISID is None:
	441	yt_cookies = self._get_cookies('https://www.youtube.com')
	442	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	443	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	444	sapisid_cookie = dict_get(
	445	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	446	if sapisid_cookie and sapisid_cookie.value:
	447	self._SAPISID = sapisid_cookie.value
	448	self.write_debug('Extracted SAPISID cookie')
	449	# SAPISID cookie is required if not already present
	450	if not yt_cookies.get('SAPISID'):
	451	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	452	self._set_cookie(
	453	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	454	else:
	455	self._SAPISID = False
	456	if not self._SAPISID:
	457	return None
	458	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	459	sapisidhash = hashlib.sha1(
	460	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	461	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	462
	463	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	464	note='Downloading API JSON', errnote='Unable to download API page',
	465	context=None, api_key=None, api_hostname=None, default_client='web'):
	466
	467	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	468	data.update(query)
	469	real_headers = self.generate_api_headers(default_client=default_client)
	470	real_headers.update({'content-type': 'application/json'})
	471	if headers:
	472	real_headers.update(headers)
	473	return self._download_json(
	474	f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	data = self._search_regex(
	481	(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	482	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	483	if data:
	484	return self._parse_json(data, item_id, fatal=fatal)
	485
	486	@staticmethod
	487	def _extract_session_index(*data):
	488	"""
	489	Index of current account in account list.
	490	See: https://github.com/yt-dlp/yt-dlp/pull/519
	491	"""
	492	for ytcfg in data:
	493	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	494	if session_index is not None:
	495	return session_index
	496
	497	# Deprecated?
	498	def _extract_identity_token(self, ytcfg=None, webpage=None):
	499	if ytcfg:
	500	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import functools

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

74

INNERTUBE_CLIENTS = {

75

'web': {

76

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

77

'INNERTUBE_CONTEXT': {

78

'client': {

79

'clientName': 'WEB',

80

'clientVersion': '2.20211221.00.00',

81

}

82

},

83

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

84

},

85

'web_embedded': {

86

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

87

'INNERTUBE_CONTEXT': {

88

'client': {

89

'clientName': 'WEB_EMBEDDED_PLAYER',

90

'clientVersion': '1.20211215.00.01',

91

},

92

},

93

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

94

},

95

'web_music': {

96

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

97

'INNERTUBE_HOST': 'music.youtube.com',

98

'INNERTUBE_CONTEXT': {

99

'client': {

100

'clientName': 'WEB_REMIX',

101

'clientVersion': '1.20211213.00.00',

102

}

103

},

104

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

105

},

106

'web_creator': {

107

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

108

'INNERTUBE_CONTEXT': {

109

'client': {

110

'clientName': 'WEB_CREATOR',

111

'clientVersion': '1.20211220.02.00',

112

}

113

},

114

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

115

},

116

'android': {

117

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

118

'INNERTUBE_CONTEXT': {

119

'client': {

120

'clientName': 'ANDROID',

121

'clientVersion': '16.49',

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '16.49',

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '4.57',

144

}

145

},

146

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

147

'REQUIRE_JS_PLAYER': False

148

},

149

'android_creator': {

150

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

151

'INNERTUBE_CONTEXT': {

152

'client': {

153

'clientName': 'ANDROID_CREATOR',

154

'clientVersion': '21.47',

155

},

156

},

157

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

158

'REQUIRE_JS_PLAYER': False

159

},

160

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

161

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

162

'ios': {

163

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

164

'INNERTUBE_CONTEXT': {

165

'client': {

166

'clientName': 'IOS',

167

'clientVersion': '16.46',

168

'deviceModel': 'iPhone14,3',

169

}

170

},

171

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

172

'REQUIRE_JS_PLAYER': False

173

},

174

'ios_embedded': {

175

'INNERTUBE_CONTEXT': {

176

'client': {

177

'clientName': 'IOS_MESSAGES_EXTENSION',

178

'clientVersion': '16.46',

179

'deviceModel': 'iPhone14,3',

180

},

181

},

182

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

183

'REQUIRE_JS_PLAYER': False

184

},

185

'ios_music': {

186

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

187

'INNERTUBE_CONTEXT': {

188

'client': {

189

'clientName': 'IOS_MUSIC',

190

'clientVersion': '4.57',

191

},

192

},

193

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

194

'REQUIRE_JS_PLAYER': False

195

},

196

'ios_creator': {

197

'INNERTUBE_CONTEXT': {

198

'client': {

199

'clientName': 'IOS_CREATOR',

200

'clientVersion': '21.47',

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

204

'REQUIRE_JS_PLAYER': False

205

},

206

# mweb has 'ultralow' formats

207

# See: https://github.com/yt-dlp/yt-dlp/pull/557

208

'mweb': {

209

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

210

'INNERTUBE_CONTEXT': {

211

'client': {

212

'clientName': 'MWEB',

213

'clientVersion': '2.20211221.01.00',

214

}

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

217

},

218

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

219

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

220

'tv_embedded': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

225

'clientVersion': '2.0',

226

},

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

234

variant, *base = client_name.rsplit('.', 1)

235

if base:

236

return variant, base[0], variant

237

base, *variant = client_name.split('_', 1)

238

return client_name, base, variant[0] if variant else None

239

240

241

def build_innertube_clients():

242

THIRD_PARTY = {

243

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

244

}

245

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

246

priority = qualities(BASE_CLIENTS[::-1])

247

248

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

249

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

250

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

251

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

252

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

253

254

_, base_client, variant = _split_innertube_client(client)

255

ytcfg['priority'] = 10 * priority(base_client)

256

257

if not variant:

258

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

259

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

260

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

261

embedscreen['priority'] -= 3

262

elif variant == 'embedded':

263

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

ytcfg['priority'] -= 2

265

else:

266

ytcfg['priority'] -= 3

267

268

269

build_innertube_clients()

270

271

272

class YoutubeBaseInfoExtractor(InfoExtractor):

273

"""Provide base functions for Youtube extractors"""

274

275

_RESERVED_NAMES = (

276

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

282

283

# _NETRC_MACHINE = 'youtube'

284

285

# If True it will raise an error if no login info is provided

286

_LOGIN_REQUIRED = False

287

288

_INVIDIOUS_SITES = (

289

# invidious-redirect websites

290

r'(?:www\.)?redirect\.invidious\.io',

291

r'(?:(?:www|dev)\.)?invidio\.us',

292

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

293

r'(?:www\.)?invidious\.pussthecat\.org',

294

r'(?:www\.)?invidious\.zee\.li',

295

r'(?:www\.)?invidious\.ethibox\.fr',

296

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

297

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

298

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

299

# youtube-dl invidious instances list

300

r'(?:(?:www|no)\.)?invidiou\.sh',

301

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

302

r'(?:www\.)?invidious\.kabi\.tk',

303

r'(?:www\.)?invidious\.mastodon\.host',

304

r'(?:www\.)?invidious\.zapashcanon\.fr',

305

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

306

r'(?:www\.)?invidious\.tinfoil-hat\.net',

307

r'(?:www\.)?invidious\.himiko\.cloud',

308

r'(?:www\.)?invidious\.reallyancient\.tech',

309

r'(?:www\.)?invidious\.tube',

310

r'(?:www\.)?invidiou\.site',

311

r'(?:www\.)?invidious\.site',

312

r'(?:www\.)?invidious\.xyz',

313

r'(?:www\.)?invidious\.nixnet\.xyz',

314

r'(?:www\.)?invidious\.048596\.xyz',

315

r'(?:www\.)?invidious\.drycat\.fr',

316

r'(?:www\.)?inv\.skyn3t\.in',

317

r'(?:www\.)?tube\.poal\.co',

318

r'(?:www\.)?tube\.connect\.cafe',

319

r'(?:www\.)?vid\.wxzm\.sx',

320

r'(?:www\.)?vid\.mint\.lgbt',

321

r'(?:www\.)?vid\.puffyan\.us',

322

r'(?:www\.)?yewtu\.be',

323

r'(?:www\.)?yt\.elukerio\.org',

324

r'(?:www\.)?yt\.lelux\.fi',

325

r'(?:www\.)?invidious\.ggc-project\.de',

326

r'(?:www\.)?yt\.maisputain\.ovh',

327

r'(?:www\.)?ytprivate\.com',

328

r'(?:www\.)?invidious\.13ad\.de',

329

r'(?:www\.)?invidious\.toot\.koeln',

330

r'(?:www\.)?invidious\.fdn\.fr',

331

r'(?:www\.)?watch\.nettohikari\.com',

332

r'(?:www\.)?invidious\.namazso\.eu',

333

r'(?:www\.)?invidious\.silkky\.cloud',

334

r'(?:www\.)?invidious\.exonip\.de',

335

r'(?:www\.)?invidious\.riverside\.rocks',

336

r'(?:www\.)?invidious\.blamefran\.net',

337

r'(?:www\.)?invidious\.moomoo\.de',

338

r'(?:www\.)?ytb\.trom\.tf',

339

r'(?:www\.)?yt\.cyberhost\.uk',

340

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

341

r'(?:www\.)?qklhadlycap4cnod\.onion',

342

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

343

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

344

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

345

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

346

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

347

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

348

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

349

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

350

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

351

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

352

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

353

r'(?:www\.)?piped\.kavin\.rocks',

354

r'(?:www\.)?piped\.silkky\.cloud',

355

r'(?:www\.)?piped\.tokhmi\.xyz',

356

r'(?:www\.)?piped\.moomoo\.me',

357

r'(?:www\.)?il\.ax',

358

r'(?:www\.)?piped\.syncpundit\.com',

359

r'(?:www\.)?piped\.mha\.fi',

360

r'(?:www\.)?piped\.mint\.lgbt',

361

r'(?:www\.)?piped\.privacy\.com\.de',

362

)

363

364

def _initialize_consent(self):

365

cookies = self._get_cookies('https://www.youtube.com/')

366

if cookies.get('__Secure-3PSID'):

367

return

368

consent_id = None

369

consent = cookies.get('CONSENT')

370

if consent:

371

if 'YES' in consent.value:

372

return

373

consent_id = self._search_regex(

374

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

375

if not consent_id:

376

consent_id = random.randint(100, 999)

377

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

378

379

def _initialize_pref(self):

380

cookies = self._get_cookies('https://www.youtube.com/')

381

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

386

except ValueError:

387

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

388

pref.update({'hl': 'en', 'tz': 'UTC'})

389

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

390

391

def _real_initialize(self):

392

self._initialize_pref()

393

self._initialize_consent()

394

self._check_login_required()

395

396

def _check_login_required(self):

397

if self._LOGIN_REQUIRED and not self._cookies_passed:

398

self.raise_login_required('Login details are needed to download this content', method='cookies')

399

400

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+})\s*;'

401

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+})\s*;'

402

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

403

404

def _get_default_ytcfg(self, client='web'):

405

return copy.deepcopy(INNERTUBE_CLIENTS[client])

406

407

def _get_innertube_host(self, client='web'):

408

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

409

410

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

411

# try_get but with fallback to default ytcfg client values when present

412

_func = lambda y: try_get(y, getter, expected_type)

413

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

414

415

def _extract_client_name(self, ytcfg, default_client='web'):

416

return self._ytcfg_get_safe(

417

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

418

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

419

420

def _extract_client_version(self, ytcfg, default_client='web'):

421

return self._ytcfg_get_safe(

422

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

423

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

424

425

def _extract_api_key(self, ytcfg=None, default_client='web'):

426

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

427

428

def _extract_context(self, ytcfg=None, default_client='web'):

429

context = get_first(

430

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

431

# Enforce language and tz for extraction

432

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

433

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

439

time_now = round(time.time())

440

if self._SAPISID is None:

441

yt_cookies = self._get_cookies('https://www.youtube.com')

442

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

443

# See: https://github.com/yt-dlp/yt-dlp/issues/393

444

sapisid_cookie = dict_get(

445

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

446

if sapisid_cookie and sapisid_cookie.value:

447

self._SAPISID = sapisid_cookie.value

448

self.write_debug('Extracted SAPISID cookie')

449

# SAPISID cookie is required if not already present

450

if not yt_cookies.get('SAPISID'):

451

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

452

self._set_cookie(

453

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

454

else:

455

self._SAPISID = False

456

if not self._SAPISID:

457

return None

458

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

459

sapisidhash = hashlib.sha1(

460

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

461

return f'SAPISIDHASH {time_now}_{sapisidhash}'

462

463

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

464

note='Downloading API JSON', errnote='Unable to download API page',

465

context=None, api_key=None, api_hostname=None, default_client='web'):

466

467

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

468

data.update(query)

469

real_headers = self.generate_api_headers(default_client=default_client)

470

real_headers.update({'content-type': 'application/json'})

471

if headers:

472

real_headers.update(headers)

473

return self._download_json(

474

f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

data = self._search_regex(

481

(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',

482

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

483

if data:

484

return self._parse_json(data, item_id, fatal=fatal)

485

486

@staticmethod

487

def _extract_session_index(*data):

488

"""

489

Index of current account in account list.

490

See: https://github.com/yt-dlp/yt-dlp/pull/519

491

"""

492

for ytcfg in data:

493

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

494

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

499

if ytcfg:

500

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

505

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

506

'identity token', default=None, fatal=False)

507

508

@staticmethod

509

def _extract_account_syncid(*args):

510

"""

511

Extract syncId required to download private playlists of secondary channels

512

@params response and/or ytcfg

513

"""

514

for data in args:

515

# ytcfg includes channel_syncid if on secondary channel

516

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

521

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

522

if len(sync_ids) >= 2 and sync_ids[1]:

523

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

524

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

529

"""

530

Extracts visitorData from an API response or ytcfg

531

Appears to be used to track session state

532

"""

533

return get_first(

534

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

535

expected_type=str)

536

537

@functools.cached_property

538

def is_authenticated(self):

539

return bool(self._generate_sapisidhash_header())

540

541

def extract_ytcfg(self, video_id, webpage):

542

if not webpage:

543

return {}

544

return self._parse_json(

545

self._search_regex(

546

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

547

default='{}'), video_id, fatal=False) or {}

548

549

def generate_api_headers(

550

self, *, ytcfg=None, account_syncid=None, session_index=None,

551

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

552

553

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

554

headers = {

555

'X-YouTube-Client-Name': compat_str(

556

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

557

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

558

'Origin': origin,

559

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

560

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

561

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

562

}

563

if session_index is None:

564

session_index = self._extract_session_index(ytcfg)

565

if account_syncid or session_index is not None:

566

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

567

568

auth = self._generate_sapisidhash_header(origin)

569

if auth is not None:

570

headers['Authorization'] = auth

571

headers['X-Origin'] = origin

572

return {h: v for h, v in headers.items() if v is not None}

573

574

def _download_ytcfg(self, client, video_id):

575

url = {

576

'web': 'https://www.youtube.com',

577

'web_music': 'https://music.youtube.com',

578

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

583

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

584

return self.extract_ytcfg(video_id, webpage) or {}

585

586

@staticmethod

587

def _build_api_continuation_query(continuation, ctp=None):

588

query = {

589

'continuation': continuation

590

}

591

# TODO: Inconsistency with clickTrackingParams.

592

# Currently we have a fixed ctp contained within context (from ytcfg)

593

# and a ctp in root query for continuation.

594

if ctp:

595

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

600

next_continuation = try_get(

601

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

602

lambda x: x['continuation']['reloadContinuationData']), dict)

603

if not next_continuation:

604

return

605

continuation = next_continuation.get('continuation')

606

if not continuation:

607

return

608

ctp = next_continuation.get('clickTrackingParams')

609

return cls._build_api_continuation_query(continuation, ctp)

610

611

@classmethod

612

def _extract_continuation_ep_data(cls, continuation_ep: dict):

613

if isinstance(continuation_ep, dict):

614

continuation = try_get(

615

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

616

if not continuation:

617

return

618

ctp = continuation_ep.get('clickTrackingParams')

619

return cls._build_api_continuation_query(continuation, ctp)

620

621

@classmethod

622

def _extract_continuation(cls, renderer):

623

next_continuation = cls._extract_next_continuation_data(renderer)

624

if next_continuation:

625

return next_continuation

626

627

contents = []

628

for key in ('contents', 'items'):

629

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

630

631

for content in contents:

632

if not isinstance(content, dict):

633

continue

634

continuation_ep = try_get(

635

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

636

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

637

dict)

638

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

644

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

645

if not isinstance(alert_dict, dict):

646

continue

647

for alert in alert_dict.values():

648

alert_type = alert.get('type')

649

if not alert_type:

650

continue

651

message = cls._get_text(alert, 'text')

652

if message:

653

yield alert_type, message

654

655

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

656

errors = []

657

warnings = []

658

for alert_type, alert_message in alerts:

659

if alert_type.lower() == 'error' and fatal:

660

errors.append([alert_type, alert_message])

661

else:

662

warnings.append([alert_type, alert_message])

663

664

for alert_type, alert_message in (warnings + errors[:-1]):

665

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

666

if errors:

667

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

668

669

def _extract_and_report_alerts(self, data, *args, **kwargs):

670

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

671

672

def _extract_badges(self, renderer: dict):

673

badges = set()

674

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

675

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

676

if label:

677

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

682

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

687

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

688

obj = [obj]

689

for item in obj:

690

text = try_get(item, lambda x: x['simpleText'], compat_str)

691

if text:

692

return text

693

runs = try_get(item, lambda x: x['runs'], list) or []

694

if not runs and isinstance(item, list):

695

runs = item

696

697

runs = runs[:min(len(runs), max_runs or len(runs))]

698

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

703

count_text = self._get_text(data, *path_list) or ''

704

count = parse_count(count_text)

705

if count is None:

706

count = str_to_int(

707

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

712

"""

713

Extract thumbnails from thumbnails dict

714

@param path_list: path list to level that contains 'thumbnails' key

715

"""

716

thumbnails = []

717

for path in path_list or [()]:

718

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

719

thumbnail_url = url_or_none(thumbnail.get('url'))

720

if not thumbnail_url:

721

continue

722

# Sometimes youtube gives a wrong thumbnail URL. See:

723

# https://github.com/yt-dlp/yt-dlp/issues/233

724

# https://github.com/ytdl-org/youtube-dl/issues/28023

725

if 'maxresdefault' in thumbnail_url:

726

thumbnail_url = thumbnail_url.split('?')[0]

727

thumbnails.append({

728

'url': thumbnail_url,

729

'height': int_or_none(thumbnail.get('height')),

730

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

736

"""

737

Extracts a relative time from string and converts to dt object

738

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

743

if start:

744

return datetime_from_str(start)

745

try:

746

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

751

"""@returns (timestamp, time_text)"""

752

text = self._get_text(renderer, *path_list) or ''

753

dt = self.extract_relative_time(text)

754

timestamp = None

755

if isinstance(dt, datetime.datetime):

756

timestamp = calendar.timegm(dt.timetuple())

757

758

if timestamp is None:

759

timestamp = (

760

unified_timestamp(text) or unified_timestamp(

761

self._search_regex(

762

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

763

text.lower(), 'time text', default=None)))

764

765

if text and timestamp is None:

766

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

767

return timestamp, text

768

769

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

770

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

771

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

776

if check_get_keys is None:

777

check_get_keys = []

778

while count < retries:

779

count += 1

780

if last_error:

781

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

782

try:

783

response = self._call_api(

784

ep=ep, fatal=True, headers=headers,

785

video_id=item_id, query=query,

786

context=self._extract_context(ytcfg, default_client),

787

api_key=self._extract_api_key(ytcfg, default_client),

788

api_hostname=api_hostname, default_client=default_client,

789

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

790

except ExtractorError as e:

791

if isinstance(e.cause, network_exceptions):

792

if isinstance(e.cause, compat_HTTPError):

793

first_bytes = e.cause.read(512)

794

if not is_html(first_bytes):

795

yt_error = try_get(

796

self._parse_json(

797

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

798

lambda x: x['error']['message'], compat_str)

799

if yt_error:

800

self._report_alerts([('ERROR', yt_error)], fatal=False)

801

# Downloading page may result in intermittent 5xx HTTP error

802

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

803

# We also want to catch all other network exceptions since errors in later pages can be troublesome

804

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

805

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

806

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

818

except ExtractorError as e:

819

# YouTube servers may return errors we want to retry on in a 200 OK response

820

# See: https://github.com/yt-dlp/yt-dlp/issues/839

821

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

827

return

828

if not check_get_keys or dict_get(response, check_get_keys):

829

break

830

# Youtube sometimes sends incomplete data

831

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

832

last_error = 'Incomplete data received'

833

if count >= retries:

834

if fatal:

835

raise ExtractorError(last_error)

836

else:

837

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

843

return re.match(r'https?://music\.youtube\.com/', url) is not None

844

845

def _extract_video(self, renderer):

846

video_id = renderer.get('videoId')

847

title = self._get_text(renderer, 'title')

848

description = self._get_text(renderer, 'descriptionSnippet')

849

duration = parse_duration(self._get_text(

850

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

851

if duration is None:

852

duration = parse_duration(self._search_regex(

853

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

854

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

855

video_id, default=None, group='duration'))

856

857

view_count = self._get_count(renderer, 'viewCountText')

858

859

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

860

channel_id = traverse_obj(

861

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

862

expected_type=str, get_all=False)

863

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

864

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

865

overlay_style = traverse_obj(

866

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

867

get_all=False, expected_type=str)

868

badges = self._extract_badges(renderer)

869

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

870

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

871

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

872

expected_type=str)) or ''

873

url = f'https://www.youtube.com/watch?v={video_id}'

874

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

875

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

884

'duration': duration,

885

'view_count': view_count,

886

'uploader': uploader,

887

'channel_id': channel_id,

888

'thumbnails': thumbnails,

889

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

890

if self._configuration_arg('approximate_date', ie_key='youtubetab')

891

else None),

892

'live_status': ('is_upcoming' if scheduled_timestamp is not None

893

else 'was_live' if 'streamed' in time_text.lower()

894

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

895

else None),

896

'release_timestamp': scheduled_timestamp,

897

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

902

IE_DESC = 'YouTube'

903

_VALID_URL = r"""(?x)^

904

(

905

(?:https?://|//) # http(s):// or protocol-independent URL

906

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

907

(?:www\.)?deturl\.com/www\.youtube\.com|

908

(?:www\.)?pwnyoutube\.com|

909

(?:www\.)?hooktube\.com|

910

(?:www\.)?yourepeat\.com|

911

tube\.majestyc\.net|

912

%(invidious)s|

913

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

914

(?:.*?\#/)? # handle anchor (#/) redirect urls

915

(?: # the various things that can precede the ID:

916

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

917

|(?: # or the v= param in all its forms

918

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

919

(?:\?|\#!?) # the params delimiter ? or # or #!

920

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

926

vid\.plus| # or vid.plus/xxxx

927

zwearz\.com/watch| # or zwearz.com/watch/xxxx

928

%(invidious)s

929

)/

930

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

931

)

932

)? # all until now is optional -> you can pass the naked ID

933

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

934

(?(1).+)? # if we found the ID, everything can follow

935

(?:\#|$)""" % {

936

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

937

}

938

_PLAYER_INFO_RE = (

939

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

940

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

941

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

942

)

943

_formats = {

944

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

945

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

946

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

947

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

948

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

949

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

950

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

951

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

952

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

953

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

954

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

955

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

956

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

957

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

958

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

959

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

960

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

961

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

966

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

967

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

968

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

969

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

970

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

971

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

972

973

# Apple HTTP Live Streaming

974

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

975

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

976

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

977

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

978

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

979

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

980

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

981

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

982

983

# DASH mp4 video

984

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

987

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

988

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

990

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

991

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

992

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

993

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

994

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

995

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

996

997

# Dash mp4 audio

998

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

999

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1000

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1001

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1002

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1003

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1004

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1005

1006

# Dash webm

1007

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1008

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1009

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1010

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1011

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1012

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1013

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1014

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1016

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1019

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1020

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1023

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1024

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1025

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1026

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1027

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1028

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1029

1030

# Dash webm audio

1031

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1032

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1033

1034

# Dash webm audio with opus inside

1035

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1036

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1037

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1038

1039

# RTMP (unnamed)

1040

'_rtmp': {'protocol': 'rtmp'},

1041

1042

# av01 video only formats sometimes served with "unknown" codecs

1043

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1044

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1045

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1046

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1047

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1048

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1049

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1050

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1051

}

1052

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1064

'uploader': 'Philipp Hagemeister',

1065

'uploader_id': 'phihag',

1066

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1067

'channel': 'Philipp Hagemeister',

1068

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1069

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1070

'upload_date': '20121002',

1071

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1072

'categories': ['Science & Technology'],

1073

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1078

'playable_in_embed': True,

1079

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1080

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1089

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1094

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1095

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1096

'uploader': 'SET India',

1097

'uploader_id': 'setindia',

1098

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1099

'age_limit': 18,

1100

},

1101

'skip': 'Private video',

1102

},

1103

{

1104

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1105

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1110

'uploader': 'Philipp Hagemeister',

1111

'uploader_id': 'phihag',

1112

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1113

'channel': 'Philipp Hagemeister',

1114

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1115

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1116

'upload_date': '20121002',

1117

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1118

'categories': ['Science & Technology'],

1119

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1124

'playable_in_embed': True,

1125

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1126

'live_status': 'not_live',

1127

'age_limit': 0,

1128

'channel_follower_count': int

1129

},

1130

'params': {

1131

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1136

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1141

'uploader_id': '8KVIDEO',

1142

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1143

'description': '',

1144

'uploader': '8KVIDEO',

1145

'title': 'UHDTV TEST 8K VIDEO.mp4'

1146

},

1147

'params': {

1148

'youtube_include_dash_manifest': True,

1149

'format': '141',

1150

},

1151

'skip': 'format 141 not served anymore',

1152

},

1153

# DASH manifest with encrypted signature

1154

{

1155

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1160

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1161

'duration': 244,

1162

'uploader': 'AfrojackVEVO',

1163

'uploader_id': 'AfrojackVEVO',

1164

'upload_date': '20131011',

1165

'abr': 129.495,

1166

'like_count': int,

1167

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1168

'playable_in_embed': True,

1169

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1170

'view_count': int,

1171

'track': 'The Spark',

1172

'live_status': 'not_live',

1173

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1174

'channel': 'Afrojack',

1175

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1176

'tags': 'count:19',

1177

'availability': 'public',

1178

'categories': ['Music'],

1179

'age_limit': 0,

1180

'alt_title': 'The Spark',

1181

'channel_follower_count': int

1182

},

1183

'params': {

1184

'youtube_include_dash_manifest': True,

1185

'format': '141/bestaudio[ext=m4a]',

1186

},

1187

},

1188

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1189

{

1190

'note': 'Embed allowed age-gate video',

1191

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1196

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1197

'duration': 142,

1198

'uploader': 'The Witcher',

1199

'uploader_id': 'WitcherGame',

1200

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1201

'upload_date': '20140605',

1202

'age_limit': 18,

1203

'categories': ['Gaming'],

1204

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1205

'availability': 'needs_auth',

1206

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1207

'like_count': int,

1208

'channel': 'The Witcher',

1209

'live_status': 'not_live',

1210

'tags': 'count:17',

1211

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1212

'playable_in_embed': True,

1213

'view_count': int,

1214

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1219

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1224

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1225

'upload_date': '20200408',

1226

'uploader_id': 'FlyingKitty900',

1227

'uploader': 'FlyingKitty',

1228

'age_limit': 18,

1229

'availability': 'needs_auth',

1230

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1231

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1232

'channel': 'FlyingKitty',

1233

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1234

'view_count': int,

1235

'categories': ['Entertainment'],

1236

'live_status': 'not_live',

1237

'tags': ['Flyingkitty', 'godzilla 2'],

1238

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1239

'like_count': int,

1240

'duration': 177,

1241

'playable_in_embed': True,

1242

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1247

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1248

'info_dict': {

1249

'id': 'Tq92D6wQ1mg',

1250

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1251

'ext': 'mp4',

1252

'upload_date': '20191228',

1253

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1254

'uploader': 'Projekt Melody',

1255

'description': 'md5:17eccca93a786d51bc67646756894066',

1256

'age_limit': 18,

1257

'like_count': int,

1258

'availability': 'needs_auth',

1259

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1260

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'view_count': int,

1262

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1263

'channel': 'Projekt Melody',

1264

'live_status': 'not_live',

1265

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1266

'playable_in_embed': True,

1267

'categories': ['Entertainment'],

1268

'duration': 106,

1269

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1270

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1275

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1280

'uploader': 'Herr Lurik',

1281

'uploader_id': 'st3in234',

1282

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1283

'upload_date': '20130730',

1284

'track': 'Such mich find mich',

1285

'age_limit': 0,

1286

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1287

'like_count': int,

1288

'playable_in_embed': False,

1289

'creator': 'OOMPH!',

1290

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1291

'view_count': int,

1292

'alt_title': 'Such mich find mich',

1293

'duration': 210,

1294

'channel': 'Herr Lurik',

1295

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1296

'categories': ['Music'],

1297

'availability': 'public',

1298

'uploader_url': 'http://www.youtube.com/user/st3in234',

1299

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1300

'live_status': 'not_live',

1301

'artist': 'OOMPH!',

1302

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1307

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1308

'only_matching': True,

1309

},

1310

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1311

# YouTube Red ad is not captured for creator

1312

{

1313

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1319

'uploader_id': 'deadmau5',

1320

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1321

'creator': 'deadmau5',

1322

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1323

'uploader': 'deadmau5',

1324

'title': 'Deadmau5 - Some Chords (HD)',

1325

'alt_title': 'Some Chords',

1326

'availability': 'public',

1327

'tags': 'count:14',

1328

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1329

'view_count': int,

1330

'live_status': 'not_live',

1331

'channel': 'deadmau5',

1332

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1333

'like_count': int,

1334

'track': 'Some Chords',

1335

'artist': 'deadmau5',

1336

'playable_in_embed': True,

1337

'age_limit': 0,

1338

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1339

'categories': ['Music'],

1340

'album': 'Some Chords',

1341

'channel_follower_count': int

1342

},

1343

'expected_warnings': [

1344

'DASH manifest missing',

1345

]

1346

},

1347

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1348

{

1349

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1355

'uploader_id': 'olympic',

1356

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1357

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1358

'uploader': 'Olympics',

1359

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1360

'like_count': int,

1361

'release_timestamp': 1343767800,

1362

'playable_in_embed': True,

1363

'categories': ['Sports'],

1364

'release_date': '20120731',

1365

'channel': 'Olympics',

1366

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1367

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1368

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1369

'age_limit': 0,

1370

'availability': 'public',

1371

'live_status': 'was_live',

1372

'view_count': int,

1373

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1374

'channel_follower_count': int

1375

},

1376

'params': {

1377

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1387

'duration': 85,

1388

'upload_date': '20110310',

1389

'uploader_id': 'AllenMeow',

1390

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1391

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1392

'uploader': '孫ᄋᄅ',

1393

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1394

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1399

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1400

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1401

'view_count': int,

1402

'categories': ['People & Blogs'],

1403

'like_count': int,

1404

'live_status': 'not_live',

1405

'availability': 'unlisted',

1406

'channel_follower_count': int

1407

},

1408

},

1409

# url_encoded_fmt_stream_map is empty string

1410

{

1411

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1416

'description': '',

1417

'upload_date': '20150404',

1418

'uploader_id': 'spbelect',

1419

'uploader': 'Наблюдатели Петербурга',

1420

},

1421

'params': {

1422

'skip_download': 'requires avconv',

1423

},

1424

'skip': 'This live event has ended.',

1425

},

1426

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1427

{

1428

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1433

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1434

'duration': 220,

1435

'upload_date': '20150625',

1436

'uploader_id': 'dorappi2000',

1437

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1438

'uploader': 'dorappi2000',

1439

'formats': 'mincount:31',

1440

},

1441

'skip': 'not actual anymore',

1442

},

1443

# DASH manifest with segment_list

1444

{

1445

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1446

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1451

'uploader': 'Airtek',

1452

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1453

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1454

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1455

},

1456

'params': {

1457

'youtube_include_dash_manifest': True,

1458

'format': '135', # bestvideo

1459

},

1460

'skip': 'This live event has ended.',

1461

},

1462

{

1463

# Multifeed videos (multiple cameras), URL is for Main Camera

1464

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1465

'info_dict': {

1466

'id': 'jvGDaLqkpTg',

1467

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1475

'description': 'md5:e03b909557865076822aa169218d6a5d',

1476

'duration': 10643,

1477

'upload_date': '20161111',

1478

'uploader': 'Team PGP',

1479

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1480

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1487

'description': 'md5:e03b909557865076822aa169218d6a5d',

1488

'duration': 10991,

1489

'upload_date': '20161111',

1490

'uploader': 'Team PGP',

1491

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1492

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1499

'description': 'md5:e03b909557865076822aa169218d6a5d',

1500

'duration': 10995,

1501

'upload_date': '20161111',

1502

'uploader': 'Team PGP',

1503

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1504

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1511

'description': 'md5:e03b909557865076822aa169218d6a5d',

1512

'duration': 10990,

1513

'upload_date': '20161111',

1514

'uploader': 'Team PGP',

1515

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1516

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1521

},

1522

'skip': 'Not multifeed anymore',

1523

},

1524

{

1525

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1526

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1527

'info_dict': {

1528

'id': 'gVfLd0zydlo',

1529

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1530

},

1531

'playlist_count': 2,

1532

'skip': 'Not multifeed anymore',

1533

},

1534

{

1535

'url': 'https://vid.plus/FlRa-iH7PGw',

1536

'only_matching': True,

1537

},

1538

{

1539

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1540

'only_matching': True,

1541

},

1542

{

1543

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1544

# Also tests cut-off URL expansion in video description (see

1545

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1546

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1547

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1552

'alt_title': 'Dark Walk',

1553

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1554

'duration': 133,

1555

'upload_date': '20151119',

1556

'uploader_id': 'IronSoulElf',

1557

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1558

'uploader': 'IronSoulElf',

1559

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1560

'track': 'Dark Walk',

1561

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1562

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1563

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1564

'categories': ['Film & Animation'],

1565

'view_count': int,

1566

'live_status': 'not_live',

1567

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1568

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1569

'tags': 'count:13',

1570

'availability': 'public',

1571

'channel': 'IronSoulElf',

1572

'playable_in_embed': True,

1573

'like_count': int,

1574

'age_limit': 0,

1575

'channel_follower_count': int

1576

},

1577

'params': {

1578

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1583

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1584

'only_matching': True,

1585

},

1586

{

1587

# Video with yt:stretch=17:0

1588

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1593

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1594

'upload_date': '20151107',

1595

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1596

'uploader': 'CH GAMER DROID',

1597

},

1598

'params': {

1599

'skip_download': True,

1600

},

1601

'skip': 'This video does not exist.',

1602

},

1603

{

1604

# Video with incomplete 'yt:stretch=16:'

1605

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1606

'only_matching': True,

1607

},

1608

{

1609

# Video licensed under Creative Commons

1610

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1615

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1616

'duration': 721,

1617

'upload_date': '20150128',

1618

'uploader_id': 'BerkmanCenter',

1619

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1620

'uploader': 'The Berkman Klein Center for Internet & Society',

1621

'license': 'Creative Commons Attribution license (reuse allowed)',

1622

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1623

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1624

'like_count': int,

1625

'age_limit': 0,

1626

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1627

'channel': 'The Berkman Klein Center for Internet & Society',

1628

'availability': 'public',

1629

'view_count': int,

1630

'categories': ['Education'],

1631

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1632

'live_status': 'not_live',

1633

'playable_in_embed': True,

1634

'channel_follower_count': int

1635

},

1636

'params': {

1637

'skip_download': True,

},

},

{

# Channel-like uploader_url

1642

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1647

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1648

'duration': 4060,

1649

'upload_date': '20151120',

1650

'uploader': 'Bernie Sanders',

1651

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1653

'license': 'Creative Commons Attribution license (reuse allowed)',

1654

'playable_in_embed': True,

1655

'tags': 'count:12',

1656

'like_count': int,

1657

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1658

'age_limit': 0,

1659

'availability': 'public',

1660

'categories': ['News & Politics'],

1661

'channel': 'Bernie Sanders',

1662

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1663

'view_count': int,

1664

'live_status': 'not_live',

1665

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1666

'channel_follower_count': int

1667

},

1668

'params': {

1669

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1674

'only_matching': True,

1675

},

1676

{

1677

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1678

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1679

'only_matching': True,

1680

},

1681

{

1682

# Rental video preview

1683

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1688

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1689

'upload_date': '20150811',

1690

'uploader': 'FlixMatrix',

1691

'uploader_id': 'FlixMatrixKaravan',

1692

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1693

'license': 'Standard YouTube License',

1694

},

1695

'params': {

1696

'skip_download': True,

1697

},

1698

'skip': 'This video is not available.',

1699

},

1700

{

1701

# YouTube Red video with episode data

1702

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1707

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1708

'duration': 2085,

1709

'upload_date': '20170118',

1710

'uploader': 'Vsauce',

1711

'uploader_id': 'Vsauce',

1712

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1713

'series': 'Mind Field',

1714

'season_number': 1,

1715

'episode_number': 1,

1716

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1717

'tags': 'count:12',

1718

'view_count': int,

1719

'availability': 'public',

1720

'age_limit': 0,

1721

'channel': 'Vsauce',

1722

'episode': 'Episode 1',

1723

'categories': ['Entertainment'],

1724

'season': 'Season 1',

1725

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1726

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1727

'like_count': int,

1728

'playable_in_embed': True,

1729

'live_status': 'not_live',

1730

'channel_follower_count': int

1731

},

1732

'params': {

1733

'skip_download': True,

1734

},

1735

'expected_warnings': [

1736

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1741

# as inappropriate or offensive to some audiences.

1742

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1747

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1748

'duration': 965,

1749

'upload_date': '20140124',

1750

'uploader': 'New Century Foundation',

1751

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1752

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1753

},

1754

'params': {

1755

'skip_download': True,

1756

},

1757

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1762

'only_matching': True,

1763

},

1764

{

1765

# geo restricted to JP

1766

'url': 'sJL6WA-aGkQ',

1767

'only_matching': True,

1768

},

1769

{

1770

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1771

'only_matching': True,

1772

},

1773

{

1774

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1775

'only_matching': True,

1776

},

1777

{

1778

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1779

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1780

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1785

'only_matching': True,

1786

},

1787

{

1788

# Video with unsupported adaptive stream type formats

1789

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1794

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1795

'duration': 433,

1796

'upload_date': '20130923',

1797

'uploader': 'Amelia Putri Harwita',

1798

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1800

'formats': 'maxcount:10',

1801

},

1802

'params': {

1803

'skip_download': True,

1804

'youtube_include_dash_manifest': False,

1805

},

1806

'skip': 'not actual anymore',

1807

},

1808

{

1809

# Youtube Music Auto-generated description

1810

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1815

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1816

'upload_date': '20190312',

1817

'uploader': 'Stephen - Topic',

1818

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1819

'artist': 'Stephen',

1820

'track': 'Voyeur Girl',

1821

'album': 'it\'s too much love to know my dear',

1822

'release_date': '20190313',

1823

'release_year': 2019,

1824

'alt_title': 'Voyeur Girl',

1825

'view_count': int,

1826

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1827

'playable_in_embed': True,

1828

'like_count': int,

1829

'categories': ['Music'],

1830

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1831

'channel': 'Stephen',

1832

'availability': 'public',

1833

'creator': 'Stephen',

1834

'duration': 169,

1835

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1836

'age_limit': 0,

1837

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1838

'tags': 'count:11',

1839

'live_status': 'not_live',

1840

'channel_follower_count': int

1841

},

1842

'params': {

1843

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1848

'only_matching': True,

1849

},

1850

{

1851

# invalid -> valid video id redirection

1852

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1857

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1858

'upload_date': '20090125',

1859

'uploader': 'Prochorowka',

1860

'uploader_id': 'Prochorowka',

1861

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1862

'artist': 'Panjabi MC',

1863

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1864

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1865

},

1866

'params': {

1867

'skip_download': True,

1868

},

1869

'skip': 'Video unavailable',

1870

},

1871

{

1872

# empty description results in an empty string

1873

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1880

'uploader_id': 'ElevageOrVert',

1881

'uploader': 'ElevageOrVert',

1882

'view_count': int,

1883

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1884

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1885

'like_count': int,

1886

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1887

'tags': [],

1888

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1889

'availability': 'public',

1890

'age_limit': 0,

1891

'categories': ['Pets & Animals'],

1892

'duration': 7,

1893

'playable_in_embed': True,

1894

'live_status': 'not_live',

1895

'channel': 'ElevageOrVert',

1896

'channel_follower_count': int

1897

},

1898

'params': {

1899

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1904

# see [2] for an example with '};' inside ytInitialPlayerResponse

1905

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1906

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1907

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1912

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1913

'upload_date': '20130831',

1914

'uploader_id': 'kudvenkat',

1915

'uploader': 'kudvenkat',

1916

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1917

'like_count': int,

1918

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1919

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1920

'live_status': 'not_live',

1921

'categories': ['Education'],

1922

'availability': 'public',

1923

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1924

'tags': 'count:12',

1925

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1930

'channel_follower_count': int

1931

},

1932

'params': {

1933

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1938

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1939

'only_matching': True,

1940

},

1941

{

1942

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1943

'only_matching': True,

1944

},

1945

{

1946

# https://github.com/ytdl-org/youtube-dl/pull/28094

1947

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1953

'upload_date': '20141120',

1954

'uploader': 'The Cinematic Orchestra - Topic',

1955

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1956

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1957

'artist': 'The Cinematic Orchestra',

1958

'track': 'Burn Out',

1959

'album': 'Every Day',

1960

'like_count': int,

1961

'live_status': 'not_live',

1962

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'creator': 'The Cinematic Orchestra',

1968

'channel': 'The Cinematic Orchestra',

1969

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1970

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1971

'availability': 'public',

1972

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1973

'categories': ['Music'],

1974

'playable_in_embed': True,

1975

'channel_follower_count': int

1976

},

1977

'params': {

1978

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1983

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1984

'only_matching': True,

1985

},

1986

{

1987

# controversial video, requires bpctr/contentCheckOk

1988

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1993

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1994

'uploader': 'CBS Mornings',

1995

'uploader_id': 'CBSThisMorning',

1996

'upload_date': '20140716',

1997

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1998

'duration': 170,

1999

'categories': ['News & Politics'],

2000

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2001

'view_count': int,

2002

'channel': 'CBS Mornings',

2003

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2004

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2005

'age_limit': 18,

2006

'availability': 'needs_auth',

2007

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2008

'like_count': int,

2009

'live_status': 'not_live',

2010

'playable_in_embed': True,

2011

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2016

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2021

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2022

'upload_date': '20201120',

2023

'uploader': 'Walk around Japan',

2024

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'duration': 1456,

2027

'categories': ['Travel & Events'],

2028

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2029

'view_count': int,

2030

'channel': 'Walk around Japan',

2031

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2032

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2033

'age_limit': 0,

2034

'availability': 'public',

2035

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2036

'live_status': 'not_live',

2037

'playable_in_embed': True,

2038

'channel_follower_count': int

2039

},

2040

'params': {

2041

'skip_download': True,

2042

},

2043

}, {

2044

# Has multiple audio streams

2045

'url': 'WaOKSUlf4TM',

2046

'only_matching': True

2047

}, {

2048

# Requires Premium: has format 141 when requested using YTM url

2049

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2050

'only_matching': True

2051

}, {

2052

# multiple subtitles with same lang_code

2053

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2054

'only_matching': True,

2055

}, {

2056

# Force use android client fallback

2057

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2058

'info_dict': {

2059

'id': 'YOelRv7fMxY',

2060

'title': 'DIGGING A SECRET TUNNEL Part 1',

2061

'ext': '3gp',

2062

'upload_date': '20210624',

2063

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2064

'uploader': 'colinfurze',

2065

'uploader_id': 'colinfurze',

2066

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2067

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2068

'duration': 596,

2069

'categories': ['Entertainment'],

2070

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2071

'view_count': int,

2072

'channel': 'colinfurze',

2073

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2074

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2075

'age_limit': 0,

2076

'availability': 'public',

2077

'like_count': int,

2078

'live_status': 'not_live',

2079

'playable_in_embed': True,

2080

'channel_follower_count': int

2081

},

2082

'params': {

2083

'format': '17', # 3gp format available on android

2084

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2089

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2090

'only_matching': True,

2091

'params': {

2092

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2097

'only_matching': True,

2098

}, {

2099

'note': 'Storyboards',

2100

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2106

'uploader_id': 'scishow',

2107

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2108

'upload_date': '20140324',

2109

'uploader': 'SciShow',

2110

'like_count': int,

2111

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2112

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2113

'view_count': int,

2114

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2115

'playable_in_embed': True,

2116

'tags': 'count:12',

2117

'uploader_url': 'http://www.youtube.com/user/scishow',

2118

'availability': 'public',

2119

'channel': 'SciShow',

2120

'live_status': 'not_live',

2121

'duration': 248,

2122

'categories': ['Education'],

2123

'age_limit': 0,

2124

'channel_follower_count': int

2125

}, 'params': {'format': 'mhtml', 'skip_download': True}

2126

}, {

2127

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2128

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2133

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2134

'uploader': 'Leon Nguyen',

2135

'uploader_id': 'VNSXIII',

2136

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2137

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2138

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2143

'tags': 'count:23',

2144

'playable_in_embed': True,

2145

'live_status': 'not_live',

2146

'upload_date': '20220103',

2147

'like_count': int,

2148

'availability': 'public',

2149

'channel': 'Leon Nguyen',

2150

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2151

'channel_follower_count': int

2152

}

2153

}, {

2154

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2155

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2160

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2161

'uploader': 'Quackity',

2162

'uploader_id': 'QuackityHQ',

2163

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2164

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2165

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2170

'tags': 'count:26',

2171

'playable_in_embed': True,

2172

'live_status': 'not_live',

2173

'release_timestamp': 1641172509,

2174

'release_date': '20220103',

2175

'upload_date': '20220103',

2176

'like_count': int,

2177

'availability': 'public',

2178

'channel': 'Quackity',

2179

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2180

'channel_follower_count': int

2181

}

2182

},

2183

{ # continuous livestream. Microformat upload date should be preferred.

2184

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2185

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2186

'info_dict': {

2187

'id': 'kgx4WGK0oNU',

2188

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2189

'ext': 'mp4',

2190

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2191

'availability': 'public',

2192

'age_limit': 0,

2193

'release_timestamp': 1637975704,

2194

'upload_date': '20210619',

2195

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2196

'live_status': 'is_live',

2197

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2198

'uploader': '阿鲍Abao',

2199

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2200

'channel': 'Abao in Tokyo',

2201

'channel_follower_count': int,

2202

'release_date': '20211127',

2203

'tags': 'count:39',

2204

'categories': ['People & Blogs'],

2205

'like_count': int,

2206

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2207

'view_count': int,

2208

'playable_in_embed': True,

2209

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2210

},

2211

'params': {'skip_download': True}

2212

}, {

2213

# Story. Requires specific player params to work.

2214

# Note: stories get removed after some period of time

2215

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2220

'view_count': int,

2221

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2222

'upload_date': '20220526',

2223

'categories': ['Education'],

2224

'title': 'Story',

2225

'channel': 'IT\'S HISTORY',

2226

'description': '',

2227

'uploader_id': 'BlastfromthePast',

2228

'duration': 12,

2229

'uploader': 'IT\'S HISTORY',

2230

'playable_in_embed': True,

2231

'age_limit': 0,

2232

'live_status': 'not_live',

2233

'tags': [],

2234

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2235

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2236

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2237

}

2238

}, {

2239

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2244

'upload_date': '20220323',

2245

'like_count': int,

2246

'availability': 'unlisted',

2247

'channel': 'nao20010128nao',

2248

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2249

'age_limit': 0,

2250

'uploader': 'nao20010128nao',

2251

'uploader_id': 'nao20010128nao',

2252

'categories': ['Music'],

2253

'view_count': int,

2254

'description': '',

2255

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2256

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2257

'live_status': 'not_live',

2258

'playable_in_embed': True,

2259

'channel_follower_count': int,

2260

'duration': 6,

2261

'tags': [],

2262

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2269

from ..utils import parse_qs

2270

2271

qs = parse_qs(url)

2272

if qs.get('list', [None])[0]:

2273

return False

2274

return super().suitable(url)

2275

2276

def __init__(self, *args, **kwargs):

2277

super().__init__(*args, **kwargs)

2278

self._code_cache = {}

2279

self._player_cache = {}

2280

2281

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2282

lock = threading.Lock()

2283

2284

is_live = True

2285

start_time = time.time()

2286

formats = [f for f in formats if f.get('is_from_start')]

2287

2288

def refetch_manifest(format_id, delay):

2289

nonlocal formats, start_time, is_live

2290

if time.time() <= start_time + delay:

2291

return

2292

2293

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2294

video_details = traverse_obj(

2295

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2296

microformats = traverse_obj(

2297

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2298

expected_type=dict, default=[])

2299

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2300

start_time = time.time()

2301

2302

def mpd_feed(format_id, delay):

2303

"""

2304

@returns (manifest_url, manifest_stream_number, is_live) or None

2305

"""

2306

with lock:

2307

refetch_manifest(format_id, delay)

2308

2309

f = next((f for f in formats if f['format_id'] == format_id), None)

2310

if not f:

2311

if not is_live:

2312

self.to_screen(f'{video_id}: Video is no longer live')

2313

else:

2314

self.report_warning(

2315

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2316

return None

2317

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2322

f['fragments'] = functools.partial(

2323

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2324

2325

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2326

FETCH_SPAN, MAX_DURATION = 5, 432000

2327

2328

mpd_url, stream_number, is_live = None, None, True

2329

2330

begin_index = 0

2331

download_start_time = ctx.get('start') or time.time()

2332

2333

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2334

if lack_early_segments:

2335

self.report_warning(bug_reports_message(

2336

'Starting download from the last 120 hours of the live stream since '

2337

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2338

lack_early_segments = True

2339

2340

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2341

fragments, fragment_base_url = None, None

2342

2343

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2344

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2345

# Obtain from MPD's maximum seq value

2346

old_mpd_url = mpd_url

2347

last_error = ctx.pop('last_error', None)

2348

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2349

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2350

or (mpd_url, stream_number, False))

2351

if not refresh_sequence:

2352

if expire_fast and not is_live:

2353

return False, last_seq

2354

elif old_mpd_url == mpd_url:

2355

return True, last_seq

2356

try:

2357

fmts, _ = self._extract_mpd_formats_and_subtitles(

2358

mpd_url, None, note=False, errnote=False, fatal=False)

2359

except ExtractorError:

2360

fmts = None

2361

if not fmts:

2362

no_fragment_score += 2

2363

return False, last_seq

2364

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2365

fragments = fmt_info['fragments']

2366

fragment_base_url = fmt_info['fragment_base_url']

2367

assert fragment_base_url

2368

2369

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2370

return True, _last_seq

2371

2372

while is_live:

2373

fetch_time = time.time()

2374

if no_fragment_score > 30:

2375

return

2376

if last_segment_url:

2377

# Obtain from "X-Head-Seqnum" header value from each segment

2378

try:

2379

urlh = self._request_webpage(

2380

last_segment_url, None, note=False, errnote=False, fatal=False)

2381

except ExtractorError:

2382

urlh = None

2383

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2384

if last_seq is None:

2385

no_fragment_score += 2

2386

last_segment_url = None

2387

continue

2388

else:

2389

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2390

no_fragment_score += 2

2391

if not should_continue:

2392

continue

2393

2394

if known_idx > last_seq:

2395

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2401

# skip from the start when it's negative value

2402

known_idx = last_seq + begin_index

2403

if lack_early_segments:

2404

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2405

try:

2406

for idx in range(known_idx, last_seq):

2407

# do not update sequence here or you'll get skipped some part of it

2408

should_continue, _ = _extract_sequence_from_mpd(False, False)

2409

if not should_continue:

2410

known_idx = idx - 1

2411

raise ExtractorError('breaking out of outer loop')

2412

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2413

yield {

2414

'url': last_segment_url,

2415

}

2416

if known_idx == last_seq:

2417

no_fragment_score += 5

2418

else:

2419

no_fragment_score = 0

2420

known_idx = last_seq

2421

except ExtractorError:

2422

continue

2423

2424

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2425

2426

def _extract_player_url(self, *ytcfgs, webpage=None):

2427

player_url = traverse_obj(

2428

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2429

get_all=False, expected_type=compat_str)

2430

if not player_url:

2431

return

2432

return urljoin('https://www.youtube.com', player_url)

2433

2434

def _download_player_url(self, video_id, fatal=False):

2435

res = self._download_webpage(

2436

'https://www.youtube.com/iframe_api',

2437

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2438

if res:

2439

player_version = self._search_regex(

2440

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2441

if player_version:

2442

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2443

2444

def _signature_cache_id(self, example_sig):

2445

""" Return a string representation of a signature """

2446

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2447

2448

@classmethod

2449

def _extract_player_info(cls, player_url):

2450

for player_re in cls._PLAYER_INFO_RE:

2451

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2456

return id_m.group('id')

2457

2458

def _load_player(self, video_id, player_url, fatal=True):

2459

player_id = self._extract_player_info(player_url)

2460

if player_id not in self._code_cache:

2461

code = self._download_webpage(

2462

player_url, video_id, fatal=fatal,

2463

note='Downloading player ' + player_id,

2464

errnote='Download of %s failed' % player_url)

2465

if code:

2466

self._code_cache[player_id] = code

2467

return self._code_cache.get(player_id)

2468

2469

def _extract_signature_function(self, video_id, player_url, example_sig):

2470

player_id = self._extract_player_info(player_url)

2471

2472

# Read from filesystem cache

2473

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2474

assert os.path.basename(func_id) == func_id

2475

2476

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2477

if cache_spec is not None:

2478

return lambda s: ''.join(s[i] for i in cache_spec)

2479

2480

code = self._load_player(video_id, player_url)

2481

if code:

2482

res = self._parse_sig_js(code)

2483

2484

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2485

cache_res = res(test_string)

2486

cache_spec = [ord(c) for c in cache_res]

2487

2488

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2489

return res

2490

2491

def _print_sig_code(self, func, example_sig):

2492

if not self.get_param('youtube_print_sig_code'):

2493

return

2494

2495

def gen_sig_code(idxs):

2496

def _genslice(start, end, step):

2497

starts = '' if start == 0 else str(start)

2498

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2499

steps = '' if step == 1 else (':%d' % step)

2500

return f's[{starts}{ends}{steps}]'

2501

2502

step = None

2503

# Quelch pyflakes warnings - start will be set when step is set

2504

start = '(Never used)'

2505

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2510

step = None

2511

continue

2512

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2522

2523

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2524

cache_res = func(test_string)

2525

cache_spec = [ord(c) for c in cache_res]

2526

expr_code = ' + '.join(gen_sig_code(cache_spec))

2527

signature_id_tuple = '(%s)' % (

2528

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2529

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2530

' return %s\n') % (signature_id_tuple, expr_code)

2531

self.to_screen('Extracted signature function:\n' + code)

2532

2533

def _parse_sig_js(self, jscode):

2534

funcname = self._search_regex(

2535

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2536

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2537

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2538

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2539

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2540

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2541

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2542

# Obsolete patterns

2543

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2544

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2545

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2546

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2547

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2548

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2549

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2550

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2551

jscode, 'Initial JS player signature function name', group='sig')

2552

2553

jsi = JSInterpreter(jscode)

2554

initial_function = jsi.extract_function(funcname)

2555

return lambda s: initial_function([s])

2556

2557

def _decrypt_signature(self, s, video_id, player_url):

2558

"""Turn the encrypted s field into a working signature"""

2559

try:

2560

player_id = (player_url, self._signature_cache_id(s))

2561

if player_id not in self._player_cache:

2562

func = self._extract_signature_function(video_id, player_url, s)

2563

self._player_cache[player_id] = func

2564

func = self._player_cache[player_id]

2565

self._print_sig_code(func, s)

2566

return func(s)

2567

except Exception as e:

2568

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2569

2570

def _decrypt_nsig(self, s, video_id, player_url):

2571

"""Turn the encrypted n field into a working signature"""

2572

if player_url is None:

2573

raise ExtractorError('Cannot decrypt nsig without player_url')

2574

player_url = urljoin('https://www.youtube.com', player_url)

2575

2576

sig_id = ('nsig_value', s)

2577

if sig_id in self._player_cache:

2578

return self._player_cache[sig_id]

2579

2580

try:

2581

player_id = ('nsig', player_url)

2582

if player_id not in self._player_cache:

2583

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2584

func = self._player_cache[player_id]

2585

self._player_cache[sig_id] = func(s)

2586

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2587

return self._player_cache[sig_id]

2588

except Exception as e:

2589

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2590

2591

def _extract_n_function_name(self, jscode):

2592

nfunc, idx = self._search_regex(

2593

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2594

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2595

if not idx:

2596

return nfunc

2597

return json.loads(js_to_json(self._search_regex(

2598

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2599

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2600

2601

def _extract_n_function(self, video_id, player_url):

2602

player_id = self._extract_player_info(player_url)

2603

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2604

2605

if func_code:

2606

jsi = JSInterpreter(func_code)

2607

else:

2608

jscode = self._load_player(video_id, player_url)

2609

funcname = self._extract_n_function_name(jscode)

2610

jsi = JSInterpreter(jscode)

2611

func_code = jsi.extract_function_code(funcname)

2612

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2613

2614

if self.get_param('youtube_print_sig_code'):

2615

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2616

2617

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2618

2619

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2620

"""

2621

Extract signatureTimestamp (sts)

2622

Required to tell API what sig/player version is in use.

2623

"""

2624

sts = None

2625

if isinstance(ytcfg, dict):

2626

sts = int_or_none(ytcfg.get('STS'))

2627

2628

if not sts:

2629

# Attempt to extract from player

2630

if player_url is None:

2631

error_msg = 'Cannot extract signature timestamp without player_url.'

2632

if fatal:

2633

raise ExtractorError(error_msg)

2634

self.report_warning(error_msg)

2635

return

2636

code = self._load_player(video_id, player_url, fatal=fatal)

2637

if code:

2638

sts = int_or_none(self._search_regex(

2639

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2640

'JS player signature timestamp', group='sts', fatal=fatal))

2641

return sts

2642

2643

def _mark_watched(self, video_id, player_responses):

2644

playback_url = get_first(

2645

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2646

expected_type=url_or_none)

2647

if not playback_url:

2648

self.report_warning('Unable to mark watched')

2649

return

2650

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2651

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2652

2653

# cpn generation algorithm is reverse engineered from base.js.

2654

# In fact it works even with dummy cpn.

2655

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2656

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2663

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2664

2665

self._download_webpage(

2666

playback_url, video_id, 'Marking watched',

2667

'Unable to mark watched', fatal=False)

2668

2669

@staticmethod

2670

def _extract_urls(webpage):

2671

# Embedded YouTube player

2672

entries = [

2673

unescapeHTML(mobj.group('url'))

2674

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2685

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2686

\1''', webpage)]

2687

2688

# lazyYT YouTube embed

2689

entries.extend(list(map(

2690

unescapeHTML,

2691

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2692

2693

# Wordpress "YouTube Video Importer" plugin

2694

matches = re.findall(r'''(?x)<div[^>]+

2695

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2696

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2697

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2703

urls = YoutubeIE._extract_urls(webpage)

2704

return urls[0] if urls else None

2705

2706

@classmethod

2707

def extract_id(cls, url):

2708

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2709

if mobj is None:

2710

raise ExtractorError('Invalid URL: %s' % url)

2711

return mobj.group('id')

2712

2713

def _extract_chapters_from_json(self, data, duration):

2714

chapter_list = traverse_obj(

2715

data, (

2716

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2717

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2718

), expected_type=list)

2719

2720

return self._extract_chapters(

2721

chapter_list,

2722

chapter_time=lambda chapter: float_or_none(

2723

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2724

chapter_title=lambda chapter: traverse_obj(

2725

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2726

duration=duration)

2727

2728

def _extract_chapters_from_engagement_panel(self, data, duration):

2729

content_list = traverse_obj(

2730

data,

2731

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2732

expected_type=list, default=[])

2733

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2734

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2739

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2740

chapter_time, chapter_title, duration)

2741

for contents in content_list

))), [])

@staticmethod

def _extract_chapters_from_description(description, duration):

2746

chapters = [{'start_time': 0}]

2747

for timestamp, title in re.findall(

2748

r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''):

2749

start = parse_duration(timestamp)

2750

if start and title and chapters[-1]['start_time'] < start < duration:

2751

chapters[-1]['end_time'] = start

chapters.append({

'start_time': start,

'title': title,

})

chapters[-1]['end_time'] = duration

2757

return chapters[1:]

2758

2759

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2760

chapters = []

2761

last_chapter = {'start_time': 0}

2762

for idx, chapter in enumerate(chapter_list or []):

2763

title = chapter_title(chapter)

2764

start_time = chapter_time(chapter)

2765

if start_time is None:

2766

continue

2767

last_chapter['end_time'] = start_time

2768

if start_time < last_chapter['start_time']:

2769

if idx == 1:

2770

chapters.pop()

2771

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2772

else:

2773

self.report_warning(f'Invalid start time for chapter "{title}"')

2774

continue

2775

last_chapter = {'start_time': start_time, 'title': title}

2776

chapters.append(last_chapter)

2777

last_chapter['end_time'] = duration

2778

return chapters

2779

2780

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2781

return self._parse_json(self._search_regex(

2782

(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',

2783

regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)

2784

2785

def _extract_comment(self, comment_renderer, parent=None):

2786

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2791

2792

# note: timestamp is an estimate calculated from the current time and time_text

2793

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2794

author = self._get_text(comment_renderer, 'authorText')

2795

author_id = try_get(comment_renderer,

2796

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2797

2798

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2799

lambda x: x['likeCount']), compat_str)) or 0

2800

author_thumbnail = try_get(comment_renderer,

2801

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2802

2803

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2804

is_favorited = 'creatorHeart' in (try_get(

2805

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2810

'time_text': time_text,

2811

'like_count': votes,

2812

'is_favorited': is_favorited,

2813

'author': author,

2814

'author_id': author_id,

2815

'author_thumbnail': author_thumbnail,

2816

'author_is_uploader': author_is_uploader,

2817

'parent': parent or 'root'

2818

}

2819

2820

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2821

2822

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2823

2824

def extract_header(contents):

2825

_continuation = None

2826

for content in contents:

2827

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2828

expected_comment_count = self._get_count(

2829

comments_header_renderer, 'countText', 'commentsCount')

2830

2831

if expected_comment_count:

2832

tracker['est_total'] = expected_comment_count

2833

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2834

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2835

2836

sort_menu_item = try_get(

2837

comments_header_renderer,

2838

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2839

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2840

2841

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2842

if not _continuation:

2843

continue

2844

2845

sort_text = str_or_none(sort_menu_item.get('title'))

2846

if not sort_text:

2847

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2848

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2853

if not parent:

2854

tracker['current_page_thread'] = 0

2855

for content in contents:

2856

if not parent and tracker['total_parent_comments'] >= max_parents:

2857

yield

2858

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2859

comment_renderer = get_first(

2860

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2861

expected_type=dict, default={})

2862

2863

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2868

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2869

yield comment

2870

2871

# Attempt to get the replies

2872

comment_replies_renderer = try_get(

2873

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2874

2875

if comment_replies_renderer:

2876

tracker['current_page_thread'] += 1

2877

comment_entries_iter = self._comment_entries(

2878

comment_replies_renderer, ytcfg, video_id,

2879

parent=comment.get('id'), tracker=tracker)

2880

yield from itertools.islice(comment_entries_iter, min(

2881

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2882

2883

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2889

total_parent_comments=0,

2890

total_reply_comments=0)

2891

2892

# TODO: Deprecated

2893

# YouTube comments have a max depth of 2

2894

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2895

if max_depth:

2896

self._downloader.deprecation_warning(

2897

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2898

if max_depth == 1 and parent:

2899

return

2900

2901

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2902

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2903

2904

continuation = self._extract_continuation(root_continuation_data)

2905

2906

response = None

2907

is_forced_continuation = False

2908

is_first_continuation = parent is None

2909

if is_first_continuation and not continuation:

2910

# Sometimes you can get comments by generating the continuation yourself,

2911

# even if YouTube initially reports them being disabled - e.g. stories comments.

2912

# Note: if the comment section is actually disabled, YouTube may return a response with

2913

# required check_get_keys missing. So we will disable that check initially in this case.

2914

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2915

is_forced_continuation = True

2916

2917

for page_num in itertools.count(0):

2918

if not continuation:

2919

break

2920

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2921

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2922

if page_num == 0:

2923

if is_first_continuation:

2924

note_prefix = 'Downloading comment section API JSON'

2925

else:

2926

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2927

tracker['current_page_thread'], comment_prog_str)

2928

else:

2929

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2930

' ' if parent else '', ' replies' if parent else '',

2931

page_num, comment_prog_str)

2932

2933

response = self._extract_response(

2934

item_id=None, query=continuation,

2935

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2936

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2937

is_forced_continuation = False

2938

continuation_contents = traverse_obj(

2939

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2940

2941

continuation = None

2942

for continuation_section in continuation_contents:

2943

continuation_items = traverse_obj(

2944

continuation_section,

2945

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2946

get_all=False, expected_type=list) or []

2947

if is_first_continuation:

2948

continuation = extract_header(continuation_items)

2949

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2963

if message and not parent and tracker['running_total'] == 0:

2964

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2965

2966

@staticmethod

2967

def _generate_comment_continuation(video_id):

2968

"""

2969

Generates initial comment section continuation token from given video id

2970

"""

2971

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2972

return base64.b64encode(token.encode()).decode()

2973

2974

def _get_comments(self, ytcfg, video_id, contents, webpage):

2975

"""Entry for comment extraction"""

2976

def _real_comment_extract(contents):

2977

renderer = next((

2978

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2979

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2980

yield from self._comment_entries(renderer, ytcfg, video_id)

2981

2982

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2983

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2984

2985

@staticmethod

2986

def _get_checkok_params():

2987

return {'contentCheckOk': True, 'racyCheckOk': True}

2988

2989

@classmethod

2990

def _generate_player_context(cls, sts=None):

2991

context = {

2992

'html5Preference': 'HTML5_PREF_WANTS',

2993

}

2994

if sts is not None:

2995

context['signatureTimestamp'] = sts

2996

return {

2997

'playbackContext': {

2998

'contentPlaybackContext': context

2999

},

3000

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3005

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3006

return True

3007

3008

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3009

AGE_GATE_REASONS = (

3010

'confirm your age', 'age-restricted', 'inappropriate', # reason

3011

'age_verification_required', 'age_check_required', # status

3012

)

3013

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3014

3015

@staticmethod

3016

def _is_unplayable(player_response):

3017

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3018

3019

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3020

3021

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3022

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3023

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3024

headers = self.generate_api_headers(

3025

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3030

}

3031

yt_query.update(self._generate_player_context(sts))

3032

return self._extract_response(

3033

item_id=video_id, ep='player', query=yt_query,

3034

ytcfg=player_ytcfg, headers=headers, fatal=True,

3035

default_client=client,

3036

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3037

) or None

3038

3039

def _get_requested_clients(self, url, smuggled_data):

3040

requested_clients = []

3041

default = ['android', 'web']

3042

allowed_clients = sorted(

3043

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3044

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3045

for client in self._configuration_arg('player_client'):

3046

if client in allowed_clients:

3047

requested_clients.append(client)

3048

elif client == 'default':

3049

requested_clients.extend(default)

3050

elif client == 'all':

3051

requested_clients.extend(allowed_clients)

3052

else:

3053

self.report_warning(f'Skipping unsupported client {client}')

3054

if not requested_clients:

3055

requested_clients = default

3056

3057

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3058

requested_clients.extend(

3059

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3060

3061

return orderedSet(requested_clients)

3062

3063

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3064

initial_pr = None

3065

if webpage:

3066

initial_pr = self._extract_yt_initial_variable(

3067

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

3068

video_id, 'initial player response')

3069

3070

all_clients = set(clients)

3071

clients = clients[::-1]

3072

prs = []

3073

3074

def append_client(*client_names):

3075

""" Append the first client name that exists but not already used """

3076

for client_name in client_names:

3077

actual_client = _split_innertube_client(client_name)[0]

3078

if actual_client in INNERTUBE_CLIENTS:

3079

if actual_client not in all_clients:

3080

clients.append(client_name)

3081

all_clients.add(actual_client)

3082

return

3083

3084

# Android player_response does not have microFormats which are needed for

3085

# extraction of some data. So we return the initial_pr with formats

3086

# stripped out even if not requested by the user

3087

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3088

if initial_pr:

3089

pr = dict(initial_pr)

3090

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3095

player_url = None

3096

while clients:

3097

client, base_client, variant = _split_innertube_client(clients.pop())

3098

player_ytcfg = master_ytcfg if client == 'web' else {}

3099

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3100

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3101

3102

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3103

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3104

if 'js' in self._configuration_arg('player_skip'):

3105

require_js_player = False

3106

player_url = None

3107

3108

if not player_url and not tried_iframe_fallback and require_js_player:

3109

player_url = self._download_player_url(video_id)

3110

tried_iframe_fallback = True

3111

3112

try:

3113

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3114

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3115

except ExtractorError as e:

3116

if last_error:

3117

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3125

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3126

append_client(f'{base_client}_creator')

3127

elif self._is_agegated(pr):

3128

if variant == 'tv_embedded':

3129

append_client(f'{base_client}_embedded')

3130

elif not variant:

3131

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3137

return prs, player_url

3138

3139

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3140

itags, stream_ids = {}, []

3141

itag_qualities, res_qualities = {}, {}

3142

q = qualities([

3143

# Normally tiny is the smallest video-only formats. But

3144

# audio-only formats with unknown quality may get tagged as tiny

3145

'tiny',

3146

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3147

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3148

])

3149

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3150

3151

for fmt in streaming_formats:

3152

if fmt.get('targetDurationSec'):

3153

continue

3154

3155

itag = str_or_none(fmt.get('itag'))

3156

audio_track = fmt.get('audioTrack') or {}

3157

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3158

if stream_id in stream_ids:

3159

continue

3160

3161

quality = fmt.get('quality')

3162

height = int_or_none(fmt.get('height'))

3163

if quality == 'tiny' or not quality:

3164

quality = fmt.get('audioQuality', '').lower() or quality

3165

# The 3gp format (17) in android client has a quality of "small",

3166

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3172

if height:

3173

res_qualities[height] = quality

3174

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3175

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3176

# number of fragment that would subsequently requested with (`&sq=N`)

3177

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3178

continue

3179

3180

fmt_url = fmt.get('url')

3181

if not fmt_url:

3182

sc = compat_parse_qs(fmt.get('signatureCipher'))

3183

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3184

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3185

if not all((sc, fmt_url, player_url, encrypted_sig)):

3186

continue

3187

try:

3188

fmt_url += '&%s=%s' % (

3189

traverse_obj(sc, ('sp', -1)) or 'signature',

3190

self._decrypt_signature(encrypted_sig, video_id, player_url)

3191

)

3192

except ExtractorError as e:

3193

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3194

self.write_debug(e, only_once=True)

3195

continue

3196

3197

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3202

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3203

except ExtractorError as e:

3204

self.report_warning(

3205

'nsig extraction failed: You may experience throttling for some formats\n'

3206

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3207

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3212

stream_ids.append(stream_id)

3213

3214

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3215

language_preference = (

3216

10 if audio_track.get('audioIsDefault') and 10

3217

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3218

else -1)

3219

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3220

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3221

# Make sure to avoid false positives with small duration differences.

3222

# Eg: __2ABJjxzNo, ySuUZEjARPY

3223

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3224

if is_damaged:

3225

self.report_warning(

3226

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3227

dct = {

3228

'asr': int_or_none(fmt.get('audioSampleRate')),

3229

'filesize': int_or_none(fmt.get('contentLength')),

3230

'format_id': itag,

3231

'format_note': join_nonempty(

3232

'%s%s' % (audio_track.get('displayName') or '',

3233

' (default)' if language_preference > 0 else ''),

3234

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3235

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3236

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3237

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3238

'fps': int_or_none(fmt.get('fps')) or None,

3239

'height': height,

3240

'quality': q(quality),

3241

'has_drm': bool(fmt.get('drmFamilies')),

3242

'tbr': tbr,

3243

'url': fmt_url,

3244

'width': int_or_none(fmt.get('width')),

3245

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3246

'desc' if language_preference < -1 else ''),

3247

'language_preference': language_preference,

3248

# Strictly de-prioritize damaged and 3gp formats

3249

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3250

}

3251

mime_mobj = re.match(

3252

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3253

if mime_mobj:

3254

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3255

dct.update(parse_codecs(mime_mobj.group(2)))

3256

no_audio = dct.get('acodec') == 'none'

3257

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3263

dct['downloader_options'] = {

3264

# Youtube throttles chunks >~10M

3265

'http_chunk_size': 10485760,

3266

}

3267

if dct.get('ext'):

3268

dct['container'] = dct['ext'] + '_dash'

3269

yield dct

3270

3271

live_from_start = is_live and self.get_param('live_from_start')

3272

skip_manifests = self._configuration_arg('skip')

3273

if not self.get_param('youtube_include_hls_manifest', True):

3274

skip_manifests.append('hls')

3275

if not self.get_param('youtube_include_dash_manifest', True):

3276

skip_manifests.append('dash')

3277

get_dash = 'dash' not in skip_manifests and (

3278

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3279

get_hls = not live_from_start and 'hls' not in skip_manifests

3280

3281

def process_manifest_format(f, proto, itag):

3282

if itag in itags:

3283

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3284

return False

3285

itag = f'{itag}-{proto}'

3286

if itag:

3287

f['format_id'] = itag

3288

itags[itag] = proto

3289

3290

f['quality'] = next((

3291

q(qdict[val])

3292

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3293

if val in qdict), -1)

3294

return True

3295

3296

for sd in streaming_data:

3297

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3298

if hls_manifest_url:

3299

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3300

if process_manifest_format(f, 'hls', self._search_regex(

3301

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3302

yield f

3303

3304

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3305

if dash_manifest_url:

3306

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3307

if process_manifest_format(f, 'dash', f['format_id']):

3308

f['filesize'] = int_or_none(self._search_regex(

3309

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3310

if live_from_start:

3311

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3316

spec = get_first(

3317

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3318

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3323

args = args.split('#')

3324

counts = list(map(int_or_none, args[:5]))

3325

if len(args) != 8 or not all(counts):

3326

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3327

continue

3328

width, height, frame_count, cols, rows = counts

3329

N, sigh = args[6:]

3330

3331

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3332

fragment_count = frame_count / (cols * rows)

3333

fragment_duration = duration / fragment_count

3334

yield {

3335

'format_id': f'sb{i}',

3336

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3346

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3347

} for j in range(math.ceil(fragment_count))],

3348

}

3349

3350

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3351

webpage = None

3352

if 'webpage' not in self._configuration_arg('player_skip'):

3353

webpage = self._download_webpage(

3354

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3355

3356

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3357

3358

player_responses, player_url = self._extract_player_responses(

3359

self._get_requested_clients(url, smuggled_data),

3360

video_id, webpage, master_ytcfg)

3361

3362

return webpage, master_ytcfg, player_responses, player_url

3363

3364

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3365

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3366

is_live = get_first(video_details, 'isLive')

3367

if is_live is None:

3368

is_live = get_first(live_broadcast_details, 'isLiveNow')

3369

3370

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3371

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3372

3373

return live_broadcast_details, is_live, streaming_data, formats

3374

3375

def _real_extract(self, url):

3376

url, smuggled_data = unsmuggle_url(url, {})

3377

video_id = self._match_id(url)

3378

3379

base_url = self.http_scheme() + '//www.youtube.com/'

3380

webpage_url = base_url + 'watch?v=' + video_id

3381

3382

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3383

3384

playability_statuses = traverse_obj(

3385

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3386

3387

trailer_video_id = get_first(

3388

playability_statuses,

3389

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3390

expected_type=str)

3391

if trailer_video_id:

3392

return self.url_result(

3393

trailer_video_id, self.ie_key(), trailer_video_id)

3394

3395

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3396

if webpage else (lambda x: None))

3397

3398

video_details = traverse_obj(

3399

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3400

microformats = traverse_obj(

3401

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3402

expected_type=dict, default=[])

3403

video_title = (

3404

get_first(video_details, 'title')

3405

or self._get_text(microformats, (..., 'title'))

3406

or search_meta(['og:title', 'twitter:title', 'title']))

3407

video_description = get_first(video_details, 'shortDescription')

3408

3409

multifeed_metadata_list = get_first(

3410

player_responses,

3411

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3412

expected_type=str)

3413

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3414

if self.get_param('noplaylist'):

3415

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3420

# Unquote should take place before split on comma (,) since textual

3421

# fields may contain comma as well (see

3422

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3423

feed_data = compat_parse_qs(

3424

compat_urllib_parse_unquote_plus(feed))

3425

3426

def feed_entry(name):

3427

return try_get(

3428

feed_data, lambda x: x[name][0], compat_str)

3429

3430

feed_id = feed_entry('id')

3431

if not feed_id:

3432

continue

3433

feed_title = feed_entry('title')

3434

title = video_title

3435

if feed_title:

3436

title += ' (%s)' % feed_title

3437

entries.append({

3438

'_type': 'url_transparent',

3439

'ie_key': 'Youtube',

3440

'url': smuggle_url(

3441

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3442

{'force_singlefeed': True}),

3443

'title': title,

3444

})

3445

feed_ids.append(feed_id)

3446

self.to_screen(

3447

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3448

% (', '.join(feed_ids), video_id))

3449

return self.playlist_result(

3450

entries, video_id, video_title, video_description)

3451

3452

duration = int_or_none(

3453

get_first(video_details, 'lengthSeconds')

3454

or get_first(microformats, 'lengthSeconds')

3455

or parse_duration(search_meta('duration'))) or None

3456

3457

if get_first(video_details, 'isPostLiveDvr'):

3458

self.write_debug('Video is in Post-Live Manifestless mode')

3459

if duration or 0 > 4 * 3600:

3460

self.report_warning(

3461

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3462

'This is a known issue and patches are welcome')

3463

3464

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3465

video_id, microformats, video_details, player_responses, player_url, duration)

3466

3467

if not formats:

3468

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3469

self.report_drm(video_id)

3470

pemr = get_first(

3471

playability_statuses,

3472

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3473

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3474

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3475

if subreason:

3476

if subreason == 'The uploader has not made this video available in your country.':

3477

countries = get_first(microformats, 'availableCountries')

3478

if not countries:

3479

regions_allowed = search_meta('regionsAllowed')

3480

countries = regions_allowed.split(',') if regions_allowed else None

3481

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3482

reason += f'. {subreason}'

3483

if reason:

3484

self.raise_no_formats(reason, expected=True)

3485

3486

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3487

if not keywords and webpage:

3488

keywords = [

3489

unescapeHTML(m.group('content'))

3490

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3491

for keyword in keywords:

3492

if keyword.startswith('yt:stretch='):

3493

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3494

if mobj:

3495

# NB: float is intentional for forcing float division

3496

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3501

f['stretched_ratio'] = ratio

3502

break

3503

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3504

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3505

if thumbnail_url:

3506

thumbnails.append({

3507

'url': thumbnail_url,

3508

})

3509

original_thumbnails = thumbnails.copy()

3510

3511

# The best resolution thumbnails sometimes does not appear in the webpage

3512

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3513

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3514

thumbnail_names = [

3515

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3516

# in resolution, these are not the custom thumbnail. So de-prioritize them

3517

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3518

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3519

]

3520

n_thumbnail_names = len(thumbnail_names)

3521

thumbnails.extend({

3522

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3523

video_id=video_id, name=name, ext=ext,

3524

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3525

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3526

for thumb in thumbnails:

3527

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3528

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3529

self._remove_duplicate_formats(thumbnails)

3530

self._downloader._sort_thumbnails(original_thumbnails)

3531

3532

category = get_first(microformats, 'category') or search_meta('genre')

3533

channel_id = str_or_none(

3534

get_first(video_details, 'channelId')

3535

or get_first(microformats, 'externalChannelId')

3536

or search_meta('channelId'))

3537

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3538

3539

live_content = get_first(video_details, 'isLiveContent')

3540

is_upcoming = get_first(video_details, 'isUpcoming')

3541

if is_live is None:

3542

if is_upcoming or live_content is False:

3543

is_live = False

3544

if is_upcoming is None and (live_content or is_live):

3545

is_upcoming = False

3546

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3547

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3548

if not duration and live_end_time and live_start_time:

3549

duration = live_end_time - live_start_time

3550

3551

if is_live and self.get_param('live_from_start'):

3552

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3553

3554

formats.extend(self._extract_storyboard(player_responses, duration))

3555

3556

# Source is given priority since formats that throttle are given lower source_preference

3557

# When throttling issue is fully fixed, remove this

3558

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3563

'formats': formats,

3564

'thumbnails': thumbnails,

3565

# The best thumbnail that we are sure exists. Prevents unnecessary

3566

# URL checking if user don't care about getting the best possible thumbnail

3567

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3568

'description': video_description,

3569

'uploader': get_first(video_details, 'author'),

3570

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3571

'uploader_url': owner_profile_url,

3572

'channel_id': channel_id,

3573

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3574

'duration': duration,

3575

'view_count': int_or_none(

3576

get_first((video_details, microformats), (..., 'viewCount'))

3577

or search_meta('interactionCount')),

3578

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3579

'age_limit': 18 if (

3580

get_first(microformats, 'isFamilySafe') is False

3581

or search_meta('isFamilyFriendly') == 'false'

3582

or search_meta('og:restrictions:age') == '18+') else 0,

3583

'webpage_url': webpage_url,

3584

'categories': [category] if category else None,

3585

'tags': keywords,

3586

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3587

'is_live': is_live,

3588

'was_live': (False if is_live or is_upcoming or live_content is False

3589

else None if is_live is None or is_upcoming is None

3590

else live_content),

3591

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3592

'release_timestamp': live_start_time,

3593

}

3594

3595

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3596

if pctr:

3597

def get_lang_code(track):

3598

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3599

or track.get('languageCode'))

3600

3601

# Converted into dicts to remove duplicates

3602

captions = {

3603

get_lang_code(sub): sub

3604

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3605

translation_languages = {

3606

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3607

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3608

3609

def process_language(container, base_url, lang_code, sub_name, query):

3610

lang_subs = container.setdefault(lang_code, [])

3611

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3622

for lang_code, caption_track in captions.items():

3623

base_url = caption_track.get('baseUrl')

3624

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3625

if not base_url:

3626

continue

3627

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3628

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3633

if not caption_track.get('isTranslatable'):

3634

continue

3635

for trans_code, trans_name in translation_languages.items():

3636

if not trans_code:

3637

continue

3638

orig_trans_code = trans_code

3639

if caption_track.get('kind') != 'asr':

3640

if 'translated_subs' in self._configuration_arg('skip'):

3641

continue

3642

trans_code += f'-{lang_code}'

3643

trans_name += format_field(lang_name, template=' from %s')

3644

# Add an "-orig" label to the original language so that it can be distinguished.

3645

# The subs are returned without "-orig" as well for compatibility

3646

if lang_code == f'a-{orig_trans_code}':

3647

process_language(

3648

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3649

# Setting tlang=lang returns damaged subtitles.

3650

process_language(automatic_captions, base_url, trans_code, trans_name,

3651

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3652

info['automatic_captions'] = automatic_captions

3653

info['subtitles'] = subtitles

3654

3655

parsed_url = compat_urllib_parse_urlparse(url)

3656

for component in [parsed_url.fragment, parsed_url.query]:

3657

query = compat_parse_qs(component)

3658

for k, v in query.items():

3659

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3660

d_k += '_time'

3661

if d_k not in info and k in s_ks:

3662

info[d_k] = parse_duration(query[k][0])

3663

3664

# Youtube Music Auto-generated description

3665

if video_description:

3666

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3667

if mobj:

3668

release_year = mobj.group('release_year')

3669

release_date = mobj.group('release_date')

3670

if release_date:

3671

release_date = release_date.replace('-', '')

3672

if not release_year:

3673

release_year = release_date[:4]

3674

info.update({

3675

'album': mobj.group('album'.strip()),

3676

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3677

'track': mobj.group('track').strip(),

3678

'release_date': release_date,

3679

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3685

webpage, self._YT_INITIAL_DATA_RE, video_id,

3686

'yt initial data')

3687

if not initial_data:

3688

query = {'videoId': video_id}

3689

query.update(self._get_checkok_params())

3690

initial_data = self._extract_response(

3691

item_id=video_id, ep='next', fatal=False,

3692

ytcfg=master_ytcfg, query=query,

3693

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3694

note='Downloading initial data API JSON')

3695

3696

try: # This will error if there is no livechat

3697

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3698

except (KeyError, IndexError, TypeError):

3699

pass

3700

else:

3701

info.setdefault('subtitles', {})['live_chat'] = [{

3702

'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies

3703

'video_id': video_id,

3704

'ext': 'json',

3705

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3711

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3712

or self._extract_chapters_from_description(video_description, duration)

3713

or None)

3714

3715

contents = traverse_obj(

3716

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3717

expected_type=list, default=[])

3718

3719

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3720

if vpir:

3721

stl = vpir.get('superTitleLink')

3722

if stl:

3723

stl = self._get_text(stl)

3724

if try_get(

3725

vpir,

3726

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3727

info['location'] = stl

3728

else:

3729

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3730

if mobj:

3731

info.update({

3732

'series': mobj.group(1),

3733

'season_number': int(mobj.group(2)),

3734

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3739

list) or []):

3740

tbr = tlb.get('toggleButtonRenderer') or {}

3741

for getter, regex in [(

3742

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3743

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3744

lambda x: x['accessibility'],

3745

lambda x: x['accessibilityData']['accessibilityData'],

3746

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3747

label = (try_get(tbr, getter, dict) or {}).get('label')

3748

if label:

3749

mobj = re.match(regex, label)

3750

if mobj:

3751

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3752

break

3753

sbr_tooltip = try_get(

3754

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3755

if sbr_tooltip:

3756

like_count, dislike_count = sbr_tooltip.split(' / ')

3757

info.update({

3758

'like_count': str_to_int(like_count),

3759

'dislike_count': str_to_int(dislike_count),

3760

})

3761

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3762

if vsir:

3763

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3764

info.update({

3765

'channel': self._get_text(vor, 'title'),

3766

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3771

list) or []

3772

multiple_songs = False

3773

for row in rows:

3774

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3775

multiple_songs = True

3776

break

3777

for row in rows:

3778

mrr = row.get('metadataRowRenderer') or {}

3779

mrr_title = mrr.get('title')

3780

if not mrr_title:

3781

continue

3782

mrr_title = self._get_text(mrr, 'title')

3783

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3784

if mrr_title == 'License':

3785

info['license'] = mrr_contents_text

3786

elif not multiple_songs:

3787

if mrr_title == 'Album':

3788

info['album'] = mrr_contents_text

3789

elif mrr_title == 'Artist':

3790

info['artist'] = mrr_contents_text

3791

elif mrr_title == 'Song':

3792

info['track'] = mrr_contents_text

3793

3794

fallbacks = {

3795

'channel': 'uploader',

3796

'channel_id': 'uploader_id',

3797

'channel_url': 'uploader_url',

3798

}

3799

3800

# The upload date for scheduled, live and past live streams / premieres in microformats

3801

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3802

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3803

upload_date = (

3804

unified_strdate(get_first(microformats, 'uploadDate'))

3805

or unified_strdate(search_meta('uploadDate')))

3806

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3807

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3808

info['upload_date'] = upload_date

3809

3810

for to, frm in fallbacks.items():

3811

if not info.get(to):

3812

info[to] = info.get(frm)

3813

3814

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3820

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3821

is_membersonly = None

3822

is_premium = None

3823

if initial_data and is_private is not None:

3824

is_membersonly = False

3825

is_premium = False

3826

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3827

badge_labels = set()

3828

for content in contents:

3829

if not isinstance(content, dict):

3830

continue

3831

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3832

for badge_label in badge_labels:

3833

if badge_label.lower() == 'members only':

3834

is_membersonly = True

3835

elif badge_label.lower() == 'premium':

3836

is_premium = True

3837

elif badge_label.lower() == 'unlisted':

3838

is_unlisted = True

3839

3840

info['availability'] = self._availability(

3841

is_private=is_private,

3842

needs_premium=is_premium,

3843

needs_subscription=is_membersonly,

3844

needs_auth=info['age_limit'] >= 18,

3845

is_unlisted=None if is_private is None else is_unlisted)

3846

3847

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3848

3849

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3855

3856

@staticmethod

3857

def passthrough_smuggled_data(func):

3858

def _smuggle(entries, smuggled_data):

3859

for entry in entries:

3860

# TODO: Convert URL to music.youtube instead.

3861

# Do we need to passthrough any other smuggled_data?

3862

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3863

yield entry

3864

3865

@functools.wraps(func)

3866

def wrapper(self, url):

3867

url, smuggled_data = unsmuggle_url(url, {})

3868

if self.is_music_url(url):

3869

smuggled_data['is_music_url'] = True

3870

info_dict = func(self, url, smuggled_data)

3871

if smuggled_data and info_dict.get('entries'):

3872

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3877

channel_id = self._html_search_meta(

3878

'channelId', webpage, 'channel id', default=None)

3879

if channel_id:

3880

return channel_id

3881

channel_url = self._html_search_meta(

3882

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3883

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3884

'twitter:app:url:googleplay'), webpage, 'channel url')

3885

return self._search_regex(

3886

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3887

channel_url, 'channel id')

3888

3889

@staticmethod

3890

def _extract_basic_item_renderer(item):

3891

# Modified from _extract_grid_item_renderer

3892

known_basic_renderers = (

3893

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3894

)

3895

for key, renderer in item.items():

3896

if not isinstance(renderer, dict):

3897

continue

3898

elif key in known_basic_renderers:

3899

return renderer

3900

elif key.startswith('grid') and key.endswith('Renderer'):

3901

return renderer

3902

3903

def _grid_entries(self, grid_renderer):

3904

for item in grid_renderer['items']:

3905

if not isinstance(item, dict):

3906

continue

3907

renderer = self._extract_basic_item_renderer(item)

3908

if not isinstance(renderer, dict):

3909

continue

3910

title = self._get_text(renderer, 'title')

3911

3912

# playlist

3913

playlist_id = renderer.get('playlistId')

3914

if playlist_id:

3915

yield self.url_result(

3916

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3917

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3922

if video_id:

3923

yield self._extract_video(renderer)

3924

continue

3925

# channel

3926

channel_id = renderer.get('channelId')

3927

if channel_id:

3928

yield self.url_result(

3929

'https://www.youtube.com/channel/%s' % channel_id,

3930

ie=YoutubeTabIE.ie_key(), video_title=title)

3931

continue

3932

# generic endpoint URL support

3933

ep_url = urljoin('https://www.youtube.com/', try_get(

3934

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3935

compat_str))

3936

if ep_url:

3937

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3938

if ie.suitable(ep_url):

3939

yield self.url_result(

3940

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3941

break

3942

3943

def _music_reponsive_list_entry(self, renderer):

3944

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3945

if video_id:

3946

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3947

ie=YoutubeIE.ie_key(), video_id=video_id)

3948

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3949

if playlist_id:

3950

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3951

if video_id:

3952

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3953

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3954

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3955

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3956

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3957

if browse_id:

3958

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3959

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3960

3961

def _shelf_entries_from_content(self, shelf_renderer):

3962

content = shelf_renderer.get('content')

3963

if not isinstance(content, dict):

3964

return

3965

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3966

if renderer:

3967

# TODO: add support for nested playlists so each shelf is processed

3968

# as separate playlist

3969

# TODO: this includes only first N items

3970

yield from self._grid_entries(renderer)

3971

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3977

ep = try_get(

3978

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3979

compat_str)

3980

shelf_url = urljoin('https://www.youtube.com', ep)

3981

if shelf_url:

3982

# Skipping links to another channels, note that checking for

3983

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3984

# will not work

3985

if skip_channels and '/channels?' in shelf_url:

3986

return

3987

title = self._get_text(shelf_renderer, 'title')

3988

yield self.url_result(shelf_url, video_title=title)

3989

# Shelf may not contain shelf URL, fallback to extraction from content

3990

yield from self._shelf_entries_from_content(shelf_renderer)

3991

3992

def _playlist_entries(self, video_list_renderer):

3993

for content in video_list_renderer['contents']:

3994

if not isinstance(content, dict):

3995

continue

3996

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3997

if not isinstance(renderer, dict):

3998

continue

3999

video_id = renderer.get('videoId')

4000

if not video_id:

4001

continue

4002

yield self._extract_video(renderer)

4003

4004

def _rich_entries(self, rich_grid_renderer):

4005

renderer = try_get(

4006

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4007

video_id = renderer.get('videoId')

4008

if not video_id:

4009

return

4010

yield self._extract_video(renderer)

4011

4012

def _video_entry(self, video_renderer):

4013

video_id = video_renderer.get('videoId')

4014

if video_id:

4015

return self._extract_video(video_renderer)

4016

4017

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4018

url = urljoin('https://youtube.com', traverse_obj(

4019

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4020

if url:

4021

return self.url_result(

4022

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4023

4024

def _post_thread_entries(self, post_thread_renderer):

4025

post_renderer = try_get(

4026

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4027

if not post_renderer:

4028

return

4029

# video attachment

4030

video_renderer = try_get(

4031

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4032

video_id = video_renderer.get('videoId')

4033

if video_id:

4034

entry = self._extract_video(video_renderer)

4035

if entry:

4036

yield entry

4037

# playlist attachment

4038

playlist_id = try_get(

4039

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

4040

if playlist_id:

4041

yield self.url_result(

4042

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4043

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4044

# inline video links

4045

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4046

for run in runs:

4047

if not isinstance(run, dict):

4048

continue

4049

ep_url = try_get(

4050

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

4051

if not ep_url:

4052

continue

4053

if not YoutubeIE.suitable(ep_url):

4054

continue

4055

ep_video_id = YoutubeIE._match_id(ep_url)

4056

if video_id == ep_video_id:

4057

continue

4058

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4059

4060

def _post_thread_continuation_entries(self, post_thread_continuation):

4061

contents = post_thread_continuation.get('contents')

4062

if not isinstance(contents, list):

4063

return

4064

for content in contents:

4065

renderer = content.get('backstagePostThreadRenderer')

4066

if isinstance(renderer, dict):

4067

yield from self._post_thread_entries(renderer)

4068

continue

4069

renderer = content.get('videoRenderer')

4070

if isinstance(renderer, dict):

4071

yield self._video_entry(renderer)

4072

4073

r''' # unused

4074

def _rich_grid_entries(self, contents):

4075

for content in contents:

4076

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4077

if video_renderer:

4078

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4084

# continuation_list is modified in-place with continuation_list = [continuation_token]

4085

continuation_list[:] = [None]

4086

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4087

for content in contents:

4088

if not isinstance(content, dict):

4089

continue

4090

is_renderer = traverse_obj(

4091

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4092

expected_type=dict)

4093

if not is_renderer:

4094

renderer = content.get('richItemRenderer')

4095

if renderer:

4096

for entry in self._rich_entries(renderer):

4097

yield entry

4098

continuation_list[0] = self._extract_continuation(parent_renderer)

4099

continue

4100

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4101

for isr_content in isr_contents:

4102

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4107

'gridRenderer': self._grid_entries,

4108

'reelShelfRenderer': self._grid_entries,

4109

'shelfRenderer': self._shelf_entries,

4110

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4111

'backstagePostThreadRenderer': self._post_thread_entries,

4112

'videoRenderer': lambda x: [self._video_entry(x)],

4113

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4114

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4115

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4116

}

4117

for key, renderer in isr_content.items():

4118

if key not in known_renderers:

4119

continue

4120

for entry in known_renderers[key](renderer):

4121

if entry:

4122

yield entry

4123

continuation_list[0] = self._extract_continuation(renderer)

4124

break

4125

4126

if not continuation_list[0]:

4127

continuation_list[0] = self._extract_continuation(is_renderer)

4128

4129

if not continuation_list[0]:

4130

continuation_list[0] = self._extract_continuation(parent_renderer)

4131

4132

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4133

continuation_list = [None]

4134

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4135

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4140

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4141

yield from extract_entries(parent_renderer)

4142

continuation = continuation_list[0]

4143

4144

for page_num in itertools.count(1):

4145

if not continuation:

4146

break

4147

headers = self.generate_api_headers(

4148

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4149

response = self._extract_response(

4150

item_id=f'{item_id} page {page_num}',

4151

query=continuation, headers=headers, ytcfg=ytcfg,

4152

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4157

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4158

visitor_data = self._extract_visitor_data(response) or visitor_data

4159

4160

known_continuation_renderers = {

4161

'playlistVideoListContinuation': self._playlist_entries,

4162

'gridContinuation': self._grid_entries,

4163

'itemSectionContinuation': self._post_thread_continuation_entries,

4164

'sectionListContinuation': extract_entries, # for feeds

4165

}

4166

continuation_contents = try_get(

4167

response, lambda x: x['continuationContents'], dict) or {}

4168

continuation_renderer = None

4169

for key, value in continuation_contents.items():

4170

if key not in known_continuation_renderers:

4171

continue

4172

continuation_renderer = value

4173

continuation_list = [None]

4174

yield from known_continuation_renderers[key](continuation_renderer)

4175

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4176

break

4177

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4182

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4183

'gridVideoRenderer': (self._grid_entries, 'items'),

4184

'gridChannelRenderer': (self._grid_entries, 'items'),

4185

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4186

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4187

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4188

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4189

}

4190

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4191

continuation_items = try_get(

4192

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4193

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4194

video_items_renderer = None

4195

for key, value in continuation_item.items():

4196

if key not in known_renderers:

4197

continue

4198

video_items_renderer = {known_renderers[key][1]: continuation_items}

4199

continuation_list = [None]

4200

yield from known_renderers[key][0](video_items_renderer)

4201

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4202

break

4203

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4209

for tab in tabs:

4210

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4211

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4216

4217

def _extract_uploader(self, data):

4218

uploader = {}

4219

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4220

owner = try_get(

4221

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4222

if owner:

4223

owner_text = owner.get('text')

4224

uploader['uploader'] = self._search_regex(

4225

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4226

uploader['uploader_id'] = try_get(

4227

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4228

uploader['uploader_url'] = urljoin(

4229

'https://www.youtube.com/',

4230

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4231

return {k: v for k, v in uploader.items() if v is not None}

4232

4233

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4234

playlist_id = title = description = channel_url = channel_name = channel_id = None

4235

tags = []

4236

4237

selected_tab = self._extract_selected_tab(tabs)

4238

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4239

renderer = try_get(

4240

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4241

if renderer:

4242

channel_name = renderer.get('title')

4243

channel_url = renderer.get('channelUrl')

4244

channel_id = renderer.get('externalId')

4245

else:

4246

renderer = try_get(

4247

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4248

4249

if renderer:

4250

title = renderer.get('title')

4251

description = renderer.get('description', '')

4252

playlist_id = channel_id

4253

tags = renderer.get('keywords', '').split()

4254

4255

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4256

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4257

def _get_uncropped(url):

4258

return url_or_none((url or '').split('=')[0] + '=s0')

4259

4260

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4261

if avatar_thumbnails:

4262

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4263

if uncropped_avatar:

4264

avatar_thumbnails.append({

4265

'url': uncropped_avatar,

4266

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4271

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4272

for banner in channel_banners:

4273

banner['preference'] = -10

4274

4275

if channel_banners:

4276

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4277

if uncropped_banner:

4278

channel_banners.append({

4279

'url': uncropped_banner,

4280

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4285

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4286

4287

if playlist_id is None:

4288

playlist_id = item_id

4289

4290

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4291

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4292

if title is None:

4293

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4294

title += format_field(selected_tab, 'title', ' - %s')

4295

title += format_field(selected_tab, 'expandedText', ' - %s')

4296

4297

metadata = {

4298

'playlist_id': playlist_id,

4299

'playlist_title': title,

4300

'playlist_description': description,

4301

'uploader': channel_name,

4302

'uploader_id': channel_id,

4303

'uploader_url': channel_url,

4304

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4305

'tags': tags,

4306

'view_count': self._get_count(playlist_stats, 1),

4307

'availability': self._extract_availability(data),

4308

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4309

'playlist_count': self._get_count(playlist_stats, 0),

4310

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4311

}

4312

if not channel_id:

4313

metadata.update(self._extract_uploader(data))

4314

metadata.update({

4315

'channel': metadata['uploader'],

4316

'channel_id': metadata['uploader_id'],

4317

'channel_url': metadata['uploader_url']})

4318

return self.playlist_result(

4319

self._entries(

4320

selected_tab, playlist_id, ytcfg,

4321

self._extract_account_syncid(ytcfg, data),

4322

self._extract_visitor_data(data, ytcfg)),

4323

**metadata)

4324

4325

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4326

first_id = last_id = response = None

4327

for page_num in itertools.count(1):

4328

videos = list(self._playlist_entries(playlist))

4329

if not videos:

4330

return

4331

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4332

if start >= len(videos):

4333

return

4334

yield from videos[start:]

4335

first_id = first_id or videos[0]['id']

4336

last_id = videos[-1]['id']

4337

watch_endpoint = try_get(

4338

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4339

headers = self.generate_api_headers(

4340

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4341

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4342

query = {

4343

'playlistId': playlist_id,

4344

'videoId': watch_endpoint.get('videoId') or last_id,

4345

'index': watch_endpoint.get('index') or len(videos),

4346

'params': watch_endpoint.get('params') or 'OAE%3D'

4347

}

4348

response = self._extract_response(

4349

item_id='%s page %d' % (playlist_id, page_num),

4350

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4351

check_get_keys='contents'

4352

)

4353

playlist = try_get(

4354

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4355

4356

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4357

title = playlist.get('title') or try_get(

4358

data, lambda x: x['titleText']['simpleText'], compat_str)

4359

playlist_id = playlist.get('playlistId') or item_id

4360

4361

# Delegating everything except mix playlists to regular tab-based playlist URL

4362

playlist_url = urljoin(url, try_get(

4363

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4364

compat_str))

4365

4366

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4367

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4368

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4369

4370

if playlist_url and playlist_url != url and not is_known_unviewable:

4371

return self.url_result(

4372

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4373

video_title=title)

4374

4375

return self.playlist_result(

4376

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4377

playlist_id=playlist_id, playlist_title=title)

4378

4379

def _extract_availability(self, data):

4380

"""

4381

Gets the availability of a given playlist/tab.

4382

Note: Unless YouTube tells us explicitly, we do not assume it is public

4383

@param data: response

4384

"""

4385

is_private = is_unlisted = None

4386

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4387

badge_labels = self._extract_badges(renderer)

4388

4389

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4390

privacy_dropdown_entries = try_get(

4391

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4392

for renderer_dict in privacy_dropdown_entries:

4393

is_selected = try_get(

4394

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4395

if not is_selected:

4396

continue

4397

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4398

if label:

4399

badge_labels.add(label.lower())

4400

break

4401

4402

for badge_label in badge_labels:

4403

if badge_label == 'unlisted':

4404

is_unlisted = True

4405

elif badge_label == 'private':

4406

is_private = True

4407

elif badge_label == 'public':

4408

is_unlisted = is_private = False

4409

return self._availability(is_private, False, False, False, is_unlisted)

4410

4411

@staticmethod

4412

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4413

sidebar_renderer = try_get(

4414

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4415

for item in sidebar_renderer:

4416

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4421

"""

4422

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4423

"""

4424

browse_id = params = None

4425

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4426

if not renderer:

4427

return

4428

menu_renderer = try_get(

4429

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4430

for menu_item in menu_renderer:

4431

if not isinstance(menu_item, dict):

4432

continue

4433

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4434

text = try_get(

4435

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4436

if not text or text.lower() != 'show unavailable videos':

4437

continue

4438

browse_endpoint = try_get(

4439

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4440

browse_id = browse_endpoint.get('browseId')

4441

params = browse_endpoint.get('params')

4442

break

4443

4444

headers = self.generate_api_headers(

4445

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4446

visitor_data=self._extract_visitor_data(data, ytcfg))

4447

query = {

4448

'params': params or 'wgYCCAA=',

4449

'browseId': browse_id or 'VL%s' % item_id

4450

}

4451

return self._extract_response(

4452

item_id=item_id, headers=headers, query=query,

4453

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4454

note='Downloading API JSON with unavailable videos')

4455

4456

@functools.cached_property

4457

def skip_webpage(self):

4458

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4459

4460

def _extract_webpage(self, url, item_id, fatal=True):

4461

retries = self.get_param('extractor_retries', 3)

4462

count = -1

4463

webpage = data = last_error = None

4464

while count < retries:

4465

count += 1

4466

# Sometimes youtube returns a webpage with incomplete ytInitialData

4467

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4468

if last_error:

4469

self.report_warning('%s. Retrying ...' % last_error)

4470

try:

4471

webpage = self._download_webpage(

4472

url, item_id,

4473

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4474

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4475

except ExtractorError as e:

4476

if isinstance(e.cause, network_exceptions):

4477

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4478

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4488

except ExtractorError as e:

4489

if fatal:

4490

raise

4491

self.report_warning(error_to_compat_str(e))

4492

break

4493

4494

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4495

break

4496

4497

last_error = 'Incomplete yt initial data received'

4498

if count >= retries:

4499

if fatal:

4500

raise ExtractorError(last_error)

4501

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4507

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4508

if not ytcfg and self.is_authenticated:

4509

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4510

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4511

raise ExtractorError(

4512

f'{msg}. If you are not downloading private content, or '

4513

'your cookies are only for the first account and channel,'

4514

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4515

expected=True)

4516

self.report_warning(msg, only_once=True)

4517

4518

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4519

data = None

4520

if not self.skip_webpage:

4521

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4522

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4523

# Reject webpage data if redirected to home page without explicitly requesting

4524

selected_tab = self._extract_selected_tab(traverse_obj(

4525

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4526

if (url != 'https://www.youtube.com/feed/recommended'

4527

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4528

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4529

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4530

if fatal:

4531

raise ExtractorError(msg, expected=True)

4532

self.report_warning(msg, only_once=True)

4533

if not data:

4534

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4535

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4536

return data, ytcfg

4537

4538

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4539

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4540

resolve_response = self._extract_response(

4541

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4542

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4543

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4544

for ep_key, ep in endpoints.items():

4545

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4546

if params:

4547

return self._extract_response(

4548

item_id=item_id, query=params, ep=ep, headers=headers,

4549

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4550

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4551

err_note = 'Failed to resolve url (does the playlist exist?)'

4552

if fatal:

4553

raise ExtractorError(err_note, expected=True)

4554

self.report_warning(err_note, item_id)

4555

4556

_SEARCH_PARAMS = None

4557

4558

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4559

data = {'query': query}

4560

if params is NO_DEFAULT:

4561

params = self._SEARCH_PARAMS

4562

if params:

4563

data['params'] = params

4564

4565

content_keys = (

4566

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4567

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4568

# ytmusic search

4569

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4570

('continuationContents', ),

4571

)

4572

display_id = f'query "{query}"'

4573

check_get_keys = tuple({keys[0] for keys in content_keys})

4574

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4575

self._report_playlist_authcheck(ytcfg, fatal=False)

4576

4577

continuation_list = [None]

4578

search = None

4579

for page_num in itertools.count(1):

4580

data.update(continuation_list[0] or {})

4581

headers = self.generate_api_headers(

4582

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4583

search = self._extract_response(

4584

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4585

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4586

slr_contents = traverse_obj(search, *content_keys)

4587

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4588

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4593

IE_DESC = 'YouTube Tabs'

4594

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4603

(?P<not_channel>

4604

feed/|hashtag/|

4605

(?:playlist|watch)\?.*?\blist=

4606

)|

4607

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4612

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4613

}

4614

IE_NAME = 'youtube:tab'

4615

4616

_TESTS = [{

4617

'note': 'playlists, multipage',

4618

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4619

'playlist_mincount': 94,

4620

'info_dict': {

4621

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4622

'title': 'Igor Kleiner - Playlists',

4623

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4624

'uploader': 'Igor Kleiner',

4625

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4626

'channel': 'Igor Kleiner',

4627

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4628

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4629

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4630

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4631

'channel_follower_count': int

4632

},

4633

}, {

4634

'note': 'playlists, multipage, different order',

4635

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4636

'playlist_mincount': 94,

4637

'info_dict': {

4638

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4639

'title': 'Igor Kleiner - Playlists',

4640

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4641

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4642

'uploader': 'Igor Kleiner',

4643

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4644

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4645

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4646

'channel': 'Igor Kleiner',

4647

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4648

'channel_follower_count': int

4649

},

4650

}, {

4651

'note': 'playlists, series',

4652

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4653

'playlist_mincount': 5,

4654

'info_dict': {

4655

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4656

'title': '3Blue1Brown - Playlists',

4657

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4658

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4659

'uploader': '3Blue1Brown',

4660

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4661

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4662

'channel': '3Blue1Brown',

4663

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4664

'tags': ['Mathematics'],

4665

'channel_follower_count': int

4666

},

4667

}, {

4668

'note': 'playlists, singlepage',

4669

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4670

'playlist_mincount': 4,

4671

'info_dict': {

4672

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4673

'title': 'ThirstForScience - Playlists',

4674

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4675

'uploader': 'ThirstForScience',

4676

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4677

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4678

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4679

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4680

'tags': 'count:13',

4681

'channel': 'ThirstForScience',

4682

'channel_follower_count': int

4683

}

4684

}, {

4685

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4686

'only_matching': True,

4687

}, {

4688

'note': 'basic, single video playlist',

4689

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4690

'info_dict': {

4691

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4692

'uploader': 'Sergey M.',

4693

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4694

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4699

'channel': 'Sergey M.',

4700

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4701

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4702

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4707

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4708

'info_dict': {

4709

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4710

'uploader': 'Sergey M.',

4711

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4712

'title': 'youtube-dl empty playlist',

4713

'tags': [],

4714

'channel': 'Sergey M.',

4715

'description': '',

4716

'modified_date': '20160902',

4717

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4718

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4719

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4725

'info_dict': {

4726

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4727

'title': 'lex will - Home',

4728

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4729

'uploader': 'lex will',

4730

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4731

'channel': 'lex will',

4732

'tags': ['bible', 'history', 'prophesy'],

4733

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4734

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4735

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4736

'channel_follower_count': int

4737

},

4738

'playlist_mincount': 2,

4739

}, {

4740

'note': 'Videos tab',

4741

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4742

'info_dict': {

4743

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4744

'title': 'lex will - Videos',

4745

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4746

'uploader': 'lex will',

4747

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4748

'tags': ['bible', 'history', 'prophesy'],

4749

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4750

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4751

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4752

'channel': 'lex will',

4753

'channel_follower_count': int

4754

},

4755

'playlist_mincount': 975,

4756

}, {

4757

'note': 'Videos tab, sorted by popular',

4758

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4759

'info_dict': {

4760

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4761

'title': 'lex will - Videos',

4762

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4763

'uploader': 'lex will',

4764

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4765

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4766

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4767

'channel': 'lex will',

4768

'tags': ['bible', 'history', 'prophesy'],

4769

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4770

'channel_follower_count': int

4771

},

4772

'playlist_mincount': 199,

4773

}, {

4774

'note': 'Playlists tab',

4775

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4776

'info_dict': {

4777

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4778

'title': 'lex will - Playlists',

4779

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4780

'uploader': 'lex will',

4781

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4782

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'channel': 'lex will',

4784

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4785

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4786

'tags': ['bible', 'history', 'prophesy'],

4787

'channel_follower_count': int

4788

},

4789

'playlist_mincount': 17,

4790

}, {

4791

'note': 'Community tab',

4792

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4793

'info_dict': {

4794

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4795

'title': 'lex will - Community',

4796

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4797

'uploader': 'lex will',

4798

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4799

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'channel': 'lex will',

4801

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4802

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4803

'tags': ['bible', 'history', 'prophesy'],

4804

'channel_follower_count': int

4805

},

4806

'playlist_mincount': 18,

4807

}, {

4808

'note': 'Channels tab',

4809

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4810

'info_dict': {

4811

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4812

'title': 'lex will - Channels',

4813

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4814

'uploader': 'lex will',

4815

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4816

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4817

'channel': 'lex will',

4818

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4819

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4820

'tags': ['bible', 'history', 'prophesy'],

4821

'channel_follower_count': int

4822

},

4823

'playlist_mincount': 12,

4824

}, {

4825

'note': 'Search tab',

4826

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4827

'playlist_mincount': 40,

4828

'info_dict': {

4829

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4830

'title': '3Blue1Brown - Search - linear algebra',

4831

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4832

'uploader': '3Blue1Brown',

4833

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4834

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4835

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4836

'tags': ['Mathematics'],

4837

'channel': '3Blue1Brown',

4838

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4839

'channel_follower_count': int

4840

},

4841

}, {

4842

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4843

'only_matching': True,

4844

}, {

4845

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4846

'only_matching': True,

4847

}, {

4848

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4849

'only_matching': True,

4850

}, {

4851

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4852

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4853

'info_dict': {

4854

'title': '29C3: Not my department',

4855

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4856

'uploader': 'Christiaan008',

4857

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4858

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4859

'tags': [],

4860

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4861

'view_count': int,

4862

'modified_date': '20150605',

4863

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4864

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4865

'channel': 'Christiaan008',

4866

},

4867

'playlist_count': 96,

4868

}, {

4869

'note': 'Large playlist',

4870

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4871

'info_dict': {

4872

'title': 'Uploads from Cauchemar',

4873

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4874

'uploader': 'Cauchemar',

4875

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4876

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4877

'tags': [],

4878

'modified_date': r're:\d{8}',

4879

'channel': 'Cauchemar',

4880

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4881

'view_count': int,

4882

'description': '',

4883

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4884

},

4885

'playlist_mincount': 1123,

4886

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4887

}, {

4888

'note': 'even larger playlist, 8832 videos',

4889

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4890

'only_matching': True,

4891

}, {

4892

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4893

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4894

'info_dict': {

4895

'title': 'Uploads from Interstellar Movie',

4896

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4897

'uploader': 'Interstellar Movie',

4898

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4899

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4900

'tags': [],

4901

'view_count': int,

4902

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4903

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4904

'channel': 'Interstellar Movie',

4905

'description': '',

4906

'modified_date': r're:\d{8}',

4907

},

4908

'playlist_mincount': 21,

4909

}, {

4910

'note': 'Playlist with "show unavailable videos" button',

4911

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4912

'info_dict': {

4913

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4914

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4915

'uploader': 'Phim Siêu Nhân Nhật Bản',

4916

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4917

'view_count': int,

4918

'channel': 'Phim Siêu Nhân Nhật Bản',

4919

'tags': [],

4920

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4921

'description': '',

4922

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4923

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4924

'modified_date': r're:\d{8}',

4925

},

4926

'playlist_mincount': 200,

4927

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4928

}, {

4929

'note': 'Playlist with unavailable videos in page 7',

4930

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4931

'info_dict': {

4932

'title': 'Uploads from BlankTV',

4933

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4934

'uploader': 'BlankTV',

4935

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4936

'channel': 'BlankTV',

4937

'channel_url': 'https://www.youtube.com/c/blanktv',

4938

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4939

'view_count': int,

4940

'tags': [],

4941

'uploader_url': 'https://www.youtube.com/c/blanktv',

4942

'modified_date': r're:\d{8}',

4943

'description': '',

4944

},

4945

'playlist_mincount': 1000,

4946

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4947

}, {

4948

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4949

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4950

'info_dict': {

4951

'title': 'Data Analysis with Dr Mike Pound',

4952

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4953

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4954

'uploader': 'Computerphile',

4955

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4956

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4957

'tags': [],

4958

'view_count': int,

4959

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4960

'channel_url': 'https://www.youtube.com/user/Computerphile',

4961

'channel': 'Computerphile',

4962

},

4963

'playlist_mincount': 11,

4964

}, {

4965

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4966

'only_matching': True,

4967

}, {

4968

'note': 'Playlist URL that does not actually serve a playlist',

4969

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4974

'uploader': 'STREEM',

4975

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4976

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4977

'upload_date': '20150526',

4978

'license': 'Standard YouTube License',

4979

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4980

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4987

},

4988

'skip': 'This video is not available.',

4989

'add_ie': [YoutubeIE.ie_key()],

4990

}, {

4991

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4992

'only_matching': True,

4993

}, {

4994

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4995

'only_matching': True,

4996

}, {

4997

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4998

'info_dict': {

4999

'id': 'GgL890LIznQ', # This will keep changing

5000

'ext': 'mp4',

5001

'title': str,

5002

'uploader': 'Sky News',

5003

'uploader_id': 'skynews',

5004

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5005

'upload_date': r're:\d{8}',

5006

'description': str,

5007

'categories': ['News & Politics'],

5008

'tags': list,

5009

'like_count': int,

5010

'release_timestamp': 1642502819,

5011

'channel': 'Sky News',

5012

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5013

'age_limit': 0,

5014

'view_count': int,

5015

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5016

'playable_in_embed': True,

5017

'release_date': '20220118',

5018

'availability': 'public',

5019

'live_status': 'is_live',

5020

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5021

'channel_follower_count': int

5022

},

5023

'params': {

5024

'skip_download': True,

5025

},

5026

'expected_warnings': ['Ignoring subtitle tracks found in '],

5027

}, {

5028

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5033

'uploader': 'The Young Turks',

5034

'uploader_id': 'TheYoungTurks',

5035

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5036

'upload_date': '20150715',

5037

'license': 'Standard YouTube License',

5038

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5039

'categories': ['News & Politics'],

5040

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5045

},

5046

'only_matching': True,

5047

}, {

5048

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5049

'only_matching': True,

5050

}, {

5051

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5052

'only_matching': True,

5053

}, {

5054

'note': 'A channel that is not live. Should raise error',

5055

'url': 'https://www.youtube.com/user/numberphile/live',

5056

'only_matching': True,

5057

}, {

5058

'url': 'https://www.youtube.com/feed/trending',

5059

'only_matching': True,

5060

}, {

5061

'url': 'https://www.youtube.com/feed/library',

5062

'only_matching': True,

5063

}, {

5064

'url': 'https://www.youtube.com/feed/history',

5065

'only_matching': True,

5066

}, {

5067

'url': 'https://www.youtube.com/feed/subscriptions',

5068

'only_matching': True,

5069

}, {

5070

'url': 'https://www.youtube.com/feed/watch_later',

5071

'only_matching': True,

5072

}, {

5073

'note': 'Recommended - redirects to home page.',

5074

'url': 'https://www.youtube.com/feed/recommended',

5075

'only_matching': True,

5076

}, {

5077

'note': 'inline playlist with not always working continuations',

5078

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5079

'only_matching': True,

5080

}, {

5081

'url': 'https://www.youtube.com/course',

5082

'only_matching': True,

5083

}, {

5084

'url': 'https://www.youtube.com/zsecurity',

5085

'only_matching': True,

5086

}, {

5087

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5088

'only_matching': True,

5089

}, {

5090

'url': 'https://www.youtube.com/TheYoungTurks/live',

5091

'only_matching': True,

5092

}, {

5093

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5100

}, {

5101

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5102

'only_matching': True,

5103

}, {

5104

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5105

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5106

'only_matching': True

5107

}, {

5108

'note': '/browse/ should redirect to /channel/',

5109

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5110

'only_matching': True

5111

}, {

5112

'note': 'VLPL, should redirect to playlist?list=PL...',

5113

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5114

'info_dict': {

5115

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5116

'uploader': 'NoCopyrightSounds',

5117

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5118

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5119

'title': 'NCS Releases',

5120

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5121

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5122

'modified_date': r're:\d{8}',

5123

'view_count': int,

5124

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5125

'tags': [],

5126

'channel': 'NoCopyrightSounds',

5127

},

5128

'playlist_mincount': 166,

5129

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5130

}, {

5131

'note': 'Topic, should redirect to playlist?list=UU...',

5132

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5133

'info_dict': {

5134

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5135

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5136

'title': 'Uploads from Royalty Free Music - Topic',

5137

'uploader': 'Royalty Free Music - Topic',

5138

'tags': [],

5139

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5140

'channel': 'Royalty Free Music - Topic',

5141

'view_count': int,

5142

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5143

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5144

'modified_date': r're:\d{8}',

5145

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5146

'description': '',

5147

},

5148

'expected_warnings': [

5149

'The URL does not have a videos tab',

5150

r'[Uu]navailable videos (are|will be) hidden',

5151

],

5152

'playlist_mincount': 101,

5153

}, {

5154

'note': 'Topic without a UU playlist',

5155

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5156

'info_dict': {

5157

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5158

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5159

'tags': [],

5160

},

5161

'expected_warnings': [

5162

'the playlist redirect gave error',

5163

],

5164

'playlist_mincount': 9,

5165

}, {

5166

'note': 'Youtube music Album',

5167

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5168

'info_dict': {

5169

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5170

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5175

'modified_date': r're:\d{8}',

5176

},

5177

'playlist_count': 50,

5178

}, {

5179

'note': 'unlisted single video playlist',

5180

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5181

'info_dict': {

5182

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5183

'uploader': 'colethedj',

5184

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5185

'title': 'yt-dlp unlisted playlist test',

5186

'availability': 'unlisted',

5187

'tags': [],

5188

'modified_date': '20211208',

5189

'channel': 'colethedj',

5190

'view_count': int,

5191

'description': '',

5192

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5193

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5194

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5199

'url': 'https://www.youtube.com/feed/recommended',

5200

'info_dict': {

5201

'id': 'recommended',

5202

'title': 'recommended',

5203

'tags': [],

5204

},

5205

'playlist_mincount': 50,

5206

'params': {

5207

'skip_download': True,

5208

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5209

},

5210

}, {

5211

'note': 'API Fallback: /videos tab, sorted by oldest first',

5212

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5213

'info_dict': {

5214

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5215

'title': 'Cody\'sLab - Videos',

5216

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5217

'uploader': 'Cody\'sLab',

5218

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5219

'channel': 'Cody\'sLab',

5220

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5221

'tags': [],

5222

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5223

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5224

'channel_follower_count': int

5225

},

5226

'playlist_mincount': 650,

5227

'params': {

5228

'skip_download': True,

5229

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5230

},

5231

}, {

5232

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5233

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5234

'info_dict': {

5235

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5236

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5237

'title': 'Uploads from Royalty Free Music - Topic',

5238

'uploader': 'Royalty Free Music - Topic',

5239

'modified_date': r're:\d{8}',

5240

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5241

'description': '',

5242

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5243

'tags': [],

5244

'channel': 'Royalty Free Music - Topic',

5245

'view_count': int,

5246

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5247

},

5248

'expected_warnings': [

5249

'does not have a videos tab',

5250

r'[Uu]navailable videos (are|will be) hidden',

5251

],

5252

'playlist_mincount': 101,

5253

'params': {

5254

'skip_download': True,

5255

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5256

},

5257

}, {

5258

'note': 'non-standard redirect to regional channel',

5259

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5260

'only_matching': True

5261

}, {

5262

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5263

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5264

'info_dict': {

5265

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5266

'modified_date': '20220407',

5267

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5268

'tags': [],

5269

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5270

'uploader': 'pukkandan',

5271

'availability': 'unlisted',

5272

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5273

'channel': 'pukkandan',

5274

'description': 'Test for collaborative playlist',

5275

'title': 'yt-dlp test - collaborative playlist',

5276

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5277

},

5278

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5283

return False if YoutubeIE.suitable(url) else super().suitable(url)

5284

5285

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5286

5287

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5288

def _real_extract(self, url, smuggled_data):

5289

item_id = self._match_id(url)

5290

url = compat_urlparse.urlunparse(

5291

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5292

compat_opts = self.get_param('compat_opts', [])

5293

5294

def get_mobj(url):

5295

mobj = self._URL_RE.match(url).groupdict()

5296

mobj.update((k, '') for k, v in mobj.items() if v is None)

5297

return mobj

5298

5299

mobj, redirect_warning = get_mobj(url), None

5300

# Youtube returns incomplete data if tabname is not lower case

5301

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5302

if is_channel:

5303

if smuggled_data.get('is_music_url'):

5304

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5305

item_id = item_id[2:]

5306

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5307

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5308

mdata = self._extract_tab_endpoint(

5309

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5310

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5311

get_all=False, expected_type=compat_str)

5312

if not murl:

5313

raise ExtractorError('Failed to resolve album to playlist')

5314

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5315

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5316

pre = f'https://www.youtube.com/channel/{item_id}'

5317

5318

original_tab_name = tab

5319

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5320

# Home URLs should redirect to /videos/

5321

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5322

'To download only the videos in the home page, add a "/featured" to the URL')

5323

tab = '/videos'

5324

5325

url = ''.join((pre, tab, post))

5326

mobj = get_mobj(url)

5327

5328

# Handle both video/playlist URLs

5329

qs = parse_qs(url)

5330

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5331

5332

if not video_id and mobj['not_channel'].startswith('watch'):

5333

if not playlist_id:

5334

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5335

raise ExtractorError('Unable to recognize tab page')

5336

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5337

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5338

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5339

mobj = get_mobj(url)

5340

5341

if video_id and playlist_id:

5342

if self.get_param('noplaylist'):

5343

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5344

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5345

ie=YoutubeIE.ie_key(), video_id=video_id)

5346

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5347

5348

data, ytcfg = self._extract_data(url, item_id)

5349

5350

# YouTube may provide a non-standard redirect to the regional channel

5351

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5352

redirect_url = traverse_obj(

5353

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5354

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5355

redirect_url = ''.join((

5356

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5357

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5358

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5359

5360

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5361

if tabs:

5362

selected_tab = self._extract_selected_tab(tabs)

5363

selected_tab_name = selected_tab.get('title', '').lower()

5364

if selected_tab_name == 'home':

5365

selected_tab_name = 'featured'

5366

requested_tab_name = mobj['tab'][1:]

5367

if 'no-youtube-channel-redirect' not in compat_opts:

5368

if requested_tab_name == 'live':

5369

# Live tab should have redirected to the video

5370

raise ExtractorError('The channel is not currently live', expected=True)

5371

if requested_tab_name not in ('', selected_tab_name):

5372

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5373

if not original_tab_name:

5374

if item_id[:2] == 'UC':

5375

# Topic channels don't have /videos. Use the equivalent playlist instead

5376

pl_id = f'UU{item_id[2:]}'

5377

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5378

try:

5379

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5380

except ExtractorError:

5381

redirect_warning += ' and the playlist redirect gave error'

5382

else:

5383

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5384

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5385

if selected_tab_name and selected_tab_name != requested_tab_name:

5386

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5387

else:

5388

raise ExtractorError(redirect_warning, expected=True)

5389

5390

if redirect_warning:

5391

self.to_screen(redirect_warning)

5392

self.write_debug(f'Final URL: {url}')

5393

5394

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5395

if 'no-youtube-unavailable-videos' not in compat_opts:

5396

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5397

self._extract_and_report_alerts(data, only_once=True)

5398

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5399

if tabs:

5400

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5401

5402

playlist = traverse_obj(

5403

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5404

if playlist:

5405

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5406

5407

video_id = traverse_obj(

5408

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5409

if video_id:

5410

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5411

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5412

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5413

ie=YoutubeIE.ie_key(), video_id=video_id)

5414

5415

raise ExtractorError('Unable to recognize tab page')

5416

5417

5418

class YoutubePlaylistIE(InfoExtractor):

5419

IE_DESC = 'YouTube playlists'

5420

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5431

)''' % {

5432

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5433

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5434

}

5435

IE_NAME = 'youtube:playlist'

5436

_TESTS = [{

5437

'note': 'issue #673',

5438

'url': 'PLBB231211A4F62143',

5439

'info_dict': {

5440

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5441

'id': 'PLBB231211A4F62143',

5442

'uploader': 'Wickman',

5443

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5444

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5445

'view_count': int,

5446

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5447

'modified_date': r're:\d{8}',

5448

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5449

'channel': 'Wickman',

5450

'tags': [],

5451

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5452

},

5453

'playlist_mincount': 29,

5454

}, {

5455

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5456

'info_dict': {

5457

'title': 'YDL_safe_search',

5458

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5459

},

5460

'playlist_count': 2,

5461

'skip': 'This playlist is private',

5462

}, {

5463

'note': 'embedded',

5464

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5469

'uploader': 'milan',

5470

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5471

'description': '',

5472

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5473

'tags': [],

5474

'modified_date': '20140919',

5475

'view_count': int,

5476

'channel': 'milan',

5477

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5478

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5479

},

5480

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5481

}, {

5482

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5483

'playlist_mincount': 654,

5484

'info_dict': {

5485

'title': '2018 Chinese New Singles (11/6 updated)',

5486

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5487

'uploader': 'LBK',

5488

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5489

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5490

'channel': 'LBK',

5491

'view_count': int,

5492

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5493

'tags': [],

5494

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5495

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5496

'modified_date': r're:\d{8}',

5497

},

5498

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5499

}, {

5500

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5501

'only_matching': True,

5502

}, {

5503

# music album playlist

5504

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5505

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5510

if YoutubeTabIE.suitable(url):

5511

return False

5512

from ..utils import parse_qs

5513

qs = parse_qs(url)

5514

if qs.get('v', [None])[0]:

5515

return False

5516

return super().suitable(url)

5517

5518

def _real_extract(self, url):

5519

playlist_id = self._match_id(url)

5520

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5521

url = update_url_query(

5522

'https://www.youtube.com/playlist',

5523

parse_qs(url) or {'list': playlist_id})

5524

if is_music_url:

5525

url = smuggle_url(url, {'is_music_url': True})

5526

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5527

5528

5529

class YoutubeYtBeIE(InfoExtractor):

5530

IE_DESC = 'youtu.be'

5531

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5532

_TESTS = [{

5533

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5538

'uploader': 'Backus-Page House Museum',

5539

'uploader_id': 'backuspagemuseum',

5540

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5541

'upload_date': '20161008',

5542

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5543

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5548

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5549

'channel': 'Backus-Page House Museum',

5550

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5551

'live_status': 'not_live',

5552

'view_count': int,

5553

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5554

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5560

},

5561

}, {

5562

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5563

'only_matching': True,

5564

}]

5565

5566

def _real_extract(self, url):

5567

mobj = self._match_valid_url(url)

5568

video_id = mobj.group('id')

5569

playlist_id = mobj.group('playlist_id')

5570

return self.url_result(

5571

update_url_query('https://www.youtube.com/watch', {

5572

'v': video_id,

5573

'list': playlist_id,

5574

'feature': 'youtu.be',

5575

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5576

5577

5578

class YoutubeLivestreamEmbedIE(InfoExtractor):

5579

IE_DESC = 'YouTube livestream embeds'

5580

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5581

_TESTS = [{

5582

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5583

'only_matching': True,

5584

}]

5585

5586

def _real_extract(self, url):

5587

channel_id = self._match_id(url)

5588

return self.url_result(

5589

f'https://www.youtube.com/channel/{channel_id}/live',

5590

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5591

5592

5593

class YoutubeYtUserIE(InfoExtractor):

5594

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5595

IE_NAME = 'youtube:user'

5596

_VALID_URL = r'ytuser:(?P<id>.+)'

5597

_TESTS = [{

5598

'url': 'ytuser:phihag',

5599

'only_matching': True,

5600

}]

5601

5602

def _real_extract(self, url):

5603

user_id = self._match_id(url)

5604

return self.url_result(

5605

'https://www.youtube.com/user/%s/videos' % user_id,

5606

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5607

5608

5609

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5610

IE_NAME = 'youtube:favorites'

5611

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5612

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5613

_LOGIN_REQUIRED = True

5614

_TESTS = [{

5615

'url': ':ytfav',

5616

'only_matching': True,

5617

}, {

5618

'url': ':ytfavorites',

5619

'only_matching': True,

5620

}]

5621

5622

def _real_extract(self, url):

5623

return self.url_result(

5624

'https://www.youtube.com/playlist?list=LL',

5625

ie=YoutubeTabIE.ie_key())

5626

5627

5628

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5629

IE_NAME = 'youtube:notif'

5630

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5631

_VALID_URL = r':ytnotif(?:ication)?s?'

5632

_LOGIN_REQUIRED = True

5633

_TESTS = [{

5634

'url': ':ytnotif',

5635

'only_matching': True,

5636

}, {

5637

'url': ':ytnotifications',

5638

'only_matching': True,

5639

}]

5640

5641

def _extract_notification_menu(self, response, continuation_list):

5642

notification_list = traverse_obj(

5643

response,

5644

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5645

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5646

expected_type=list) or []

5647

continuation_list[0] = None

5648

for item in notification_list:

5649

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5650

if entry:

5651

yield entry

5652

continuation = item.get('continuationItemRenderer')

5653

if continuation:

5654

continuation_list[0] = continuation

5655

5656

def _extract_notification_renderer(self, notification):

5657

video_id = traverse_obj(

5658

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5659

url = f'https://www.youtube.com/watch?v={video_id}'

5660

channel_id = None

5661

if not video_id:

5662

browse_ep = traverse_obj(

5663

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5664

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5665

post_id = self._search_regex(

5666

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5667

'post id', default=None)

5668

if not channel_id or not post_id:

5669

return

5670

# The direct /post url redirects to this in the browser

5671

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5672

5673

channel = traverse_obj(

5674

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5675

expected_type=str)

5676

notification_title = self._get_text(notification, 'shortMessage')

5677

if notification_title:

5678

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5679

# TODO: handle recommended videos

5680

title = self._search_regex(

5681

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5682

'video title', default=None)

5683

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5684

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5690

'video_id': video_id,

5691

'title': title,

5692

'channel_id': channel_id,

5693

'channel': channel,

5694

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5695

'upload_date': upload_date,

5696

}

5697

5698

def _notification_menu_entries(self, ytcfg):

5699

continuation_list = [None]

5700

response = None

5701

for page in itertools.count(1):

5702

ctoken = traverse_obj(

5703

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5704

response = self._extract_response(

5705

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5706

ep='notification/get_notification_menu', check_get_keys='actions',

5707

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5708

yield from self._extract_notification_menu(response, continuation_list)

5709

if not continuation_list[0]:

5710

break

5711

5712

def _real_extract(self, url):

5713

display_id = 'notifications'

5714

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5715

self._report_playlist_authcheck(ytcfg)

5716

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5717

5718

5719

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5720

IE_DESC = 'YouTube search'

5721

IE_NAME = 'youtube:search'

5722

_SEARCH_KEY = 'ytsearch'

5723

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5724

_TESTS = [{

5725

'url': 'ytsearch5:youtube-dl test video',

5726

'playlist_count': 5,

5727

'info_dict': {

5728

'id': 'youtube-dl test video',

5729

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5735

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5736

_SEARCH_KEY = 'ytsearchdate'

5737

IE_DESC = 'YouTube search, newest videos first'

5738

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5739

_TESTS = [{

5740

'url': 'ytsearchdate5:youtube-dl test video',

5741

'playlist_count': 5,

5742

'info_dict': {

5743

'id': 'youtube-dl test video',

5744

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5750

IE_DESC = 'YouTube search URLs with sorting and filter support'

5751

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5752

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5753

_TESTS = [{

5754

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5755

'playlist_mincount': 5,

5756

'info_dict': {

5757

'id': 'youtube-dl test video',

5758

'title': 'youtube-dl test video',

5759

}

5760

}, {

5761

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5762

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5769

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5780

'only_matching': True,

5781

}]

5782

5783

def _real_extract(self, url):

5784

qs = parse_qs(url)

5785

query = (qs.get('search_query') or qs.get('q'))[0]

5786

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5787

5788

5789

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5790

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5791

IE_NAME = 'youtube:music:search_url'

5792

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5793

_TESTS = [{

5794

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5795

'playlist_count': 16,

5796

'info_dict': {

5797

'id': 'royalty free music',

5798

'title': 'royalty free music',

5799

}

5800

}, {

5801

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5802

'playlist_mincount': 30,

5803

'info_dict': {

5804

'id': 'royalty free music - songs',

5805

'title': 'royalty free music - songs',

5806

},

5807

'params': {'extract_flat': 'in_playlist'}

5808

}, {

5809

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5810

'playlist_mincount': 30,

5811

'info_dict': {

5812

'id': 'royalty free music - community playlists',

5813

'title': 'royalty free music - community playlists',

5814

},

5815

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5820

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5821

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5822

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5823

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5824

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5825

}

5826

5827

def _real_extract(self, url):

5828

qs = parse_qs(url)

5829

query = (qs.get('search_query') or qs.get('q'))[0]

5830

params = qs.get('sp', (None,))[0]

5831

if params:

5832

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5833

else:

5834

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5835

params = self._SECTIONS.get(section)

5836

if not params:

5837

section = None

5838

title = join_nonempty(query, section, delim=' - ')

5839

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5840

5841

5842

class YoutubeFeedsInfoExtractor(InfoExtractor):

5843

"""

5844

Base class for feed extractors

5845

Subclasses must re-define the _FEED_NAME property.

5846

"""

5847

_LOGIN_REQUIRED = True

5848

_FEED_NAME = 'feeds'

5849

5850

def _real_initialize(self):

5851

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5856

5857

def _real_extract(self, url):

5858

return self.url_result(

5859

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5860

5861

5862

class YoutubeWatchLaterIE(InfoExtractor):

5863

IE_NAME = 'youtube:watchlater'

5864

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5865

_VALID_URL = r':ytwatchlater'

5866

_TESTS = [{

5867

'url': ':ytwatchlater',

5868

'only_matching': True,

5869

}]

5870

5871

def _real_extract(self, url):

5872

return self.url_result(

5873

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5874

5875

5876

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5877

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5878

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5879

_FEED_NAME = 'recommended'

5880

_LOGIN_REQUIRED = False

5881

_TESTS = [{

5882

'url': ':ytrec',

5883

'only_matching': True,

5884

}, {

5885

'url': ':ytrecommended',

5886

'only_matching': True,

5887

}, {

5888

'url': 'https://youtube.com',

5889

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5894

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5895

_VALID_URL = r':ytsub(?:scription)?s?'

5896

_FEED_NAME = 'subscriptions'

5897

_TESTS = [{

5898

'url': ':ytsubs',

5899

'only_matching': True,

5900

}, {

5901

'url': ':ytsubscriptions',

5902

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5907

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5908

_VALID_URL = r':ythis(?:tory)?'

5909

_FEED_NAME = 'history'

5910

_TESTS = [{

5911

'url': ':ythistory',

5912

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5917

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5918

IE_NAME = 'youtube:stories'

5919

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5920

_TESTS = [{

5921

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5922

'only_matching': True,

5923

}]

5924

5925

def _real_extract(self, url):

5926

playlist_id = f'RLTD{self._match_id(url)}'

5927

return self.url_result(

5928

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5929

ie=YoutubeTabIE, video_id=playlist_id)

5930

5931

5932

class YoutubeTruncatedURLIE(InfoExtractor):

5933

IE_NAME = 'youtube:truncated_url'

5934

IE_DESC = False # Do not list

5935

_VALID_URL = r'''(?x)

5936

(?:https?://)?

5937

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5938

(?:watch\?(?:

5939

feature=[a-z_]+|

5940

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5953

'only_matching': True,

5954

}, {

5955

'url': 'https://www.youtube.com/watch?',

5956

'only_matching': True,

5957

}, {

5958

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5959

'only_matching': True,

5960

}, {

5961

'url': 'https://www.youtube.com/watch?feature=foo',

5962

'only_matching': True,

5963

}, {

5964

'url': 'https://www.youtube.com/watch?hl=en-GB',

5965

'only_matching': True,

5966

}, {

5967

'url': 'https://www.youtube.com/watch?t=2372',

5968

'only_matching': True,

5969

}]

5970

5971

def _real_extract(self, url):

5972

raise ExtractorError(

5973

'Did you forget to quote the URL? Remember that & is a meta '

5974

'character in most shells, so you want to put the URL in quotes, '

5975

'like youtube-dl '

5976

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5977

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5982

IE_NAME = 'youtube:clip'

5983

IE_DESC = False # Do not list

5984

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5985

5986

def _real_extract(self, url):

5987

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5988

return self.url_result(url, 'Generic')

5989

5990

5991

class YoutubeTruncatedIDIE(InfoExtractor):

5992

IE_NAME = 'youtube:truncated_id'

5993

IE_DESC = False # Do not list

5994

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5995

5996

_TESTS = [{

5997

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5998

'only_matching': True,

5999

}]

6000

6001

def _real_extract(self, url):

6002

video_id = self._match_id(url)

6003

raise ExtractorError(

6004

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6005

expected=True)