jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import functools # isort: split
	19	from ..compat import (
	20	compat_chr,
	21	compat_HTTPError,
	22	compat_parse_qs,
	23	compat_str,
	24	compat_urllib_parse_unquote_plus,
	25	compat_urllib_parse_urlencode,
	26	compat_urllib_parse_urlparse,
	27	compat_urlparse,
	28	)
	29	from ..jsinterp import JSInterpreter
	30	from ..utils import (
	31	NO_DEFAULT,
	32	ExtractorError,
	33	bug_reports_message,
	34	classproperty,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	float_or_none,
	40	format_field,
	41	get_first,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73	# any clients starting with _ cannot be explicity requested by the user
	74	INNERTUBE_CLIENTS = {
	75	'web': {
	76	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	77	'INNERTUBE_CONTEXT': {
	78	'client': {
	79	'clientName': 'WEB',
	80	'clientVersion': '2.20211221.00.00',
	81	}
	82	},
	83	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	84	},
	85	'web_embedded': {
	86	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	87	'INNERTUBE_CONTEXT': {
	88	'client': {
	89	'clientName': 'WEB_EMBEDDED_PLAYER',
	90	'clientVersion': '1.20211215.00.01',
	91	},
	92	},
	93	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	94	},
	95	'web_music': {
	96	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	97	'INNERTUBE_HOST': 'music.youtube.com',
	98	'INNERTUBE_CONTEXT': {
	99	'client': {
	100	'clientName': 'WEB_REMIX',
	101	'clientVersion': '1.20211213.00.00',
	102	}
	103	},
	104	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	105	},
	106	'web_creator': {
	107	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	108	'INNERTUBE_CONTEXT': {
	109	'client': {
	110	'clientName': 'WEB_CREATOR',
	111	'clientVersion': '1.20211220.02.00',
	112	}
	113	},
	114	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	115	},
	116	'android': {
	117	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	118	'INNERTUBE_CONTEXT': {
	119	'client': {
	120	'clientName': 'ANDROID',
	121	'clientVersion': '16.49',
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '16.49',
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '4.57',
	144	}
	145	},
	146	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	147	'REQUIRE_JS_PLAYER': False
	148	},
	149	'android_creator': {
	150	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	151	'INNERTUBE_CONTEXT': {
	152	'client': {
	153	'clientName': 'ANDROID_CREATOR',
	154	'clientVersion': '21.47',
	155	},
	156	},
	157	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	158	'REQUIRE_JS_PLAYER': False
	159	},
	160	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	161	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	162	'ios': {
	163	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	164	'INNERTUBE_CONTEXT': {
	165	'client': {
	166	'clientName': 'IOS',
	167	'clientVersion': '16.46',
	168	'deviceModel': 'iPhone14,3',
	169	}
	170	},
	171	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	172	'REQUIRE_JS_PLAYER': False
	173	},
	174	'ios_embedded': {
	175	'INNERTUBE_CONTEXT': {
	176	'client': {
	177	'clientName': 'IOS_MESSAGES_EXTENSION',
	178	'clientVersion': '16.46',
	179	'deviceModel': 'iPhone14,3',
	180	},
	181	},
	182	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	183	'REQUIRE_JS_PLAYER': False
	184	},
	185	'ios_music': {
	186	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	187	'INNERTUBE_CONTEXT': {
	188	'client': {
	189	'clientName': 'IOS_MUSIC',
	190	'clientVersion': '4.57',
	191	},
	192	},
	193	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	194	'REQUIRE_JS_PLAYER': False
	195	},
	196	'ios_creator': {
	197	'INNERTUBE_CONTEXT': {
	198	'client': {
	199	'clientName': 'IOS_CREATOR',
	200	'clientVersion': '21.47',
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	# mweb has 'ultralow' formats
	207	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	208	'mweb': {
	209	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	210	'INNERTUBE_CONTEXT': {
	211	'client': {
	212	'clientName': 'MWEB',
	213	'clientVersion': '2.20211221.01.00',
	214	}
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	217	},
	218	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	219	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	220	'tv_embedded': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	225	'clientVersion': '2.0',
	226	},
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	229	},
	230	}
	231
	232
	233	def _split_innertube_client(client_name):
	234	variant, *base = client_name.rsplit('.', 1)
	235	if base:
	236	return variant, base[0], variant
	237	base, *variant = client_name.split('_', 1)
	238	return client_name, base, variant[0] if variant else None
	239
	240
	241	def build_innertube_clients():
	242	THIRD_PARTY = {
	243	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	244	}
	245	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	246	priority = qualities(BASE_CLIENTS[::-1])
	247
	248	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	249	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	250	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	251	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	252	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	253
	254	_, base_client, variant = _split_innertube_client(client)
	255	ytcfg['priority'] = 10 * priority(base_client)
	256
	257	if not variant:
	258	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	259	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	260	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	261	embedscreen['priority'] -= 3
	262	elif variant == 'embedded':
	263	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	ytcfg['priority'] -= 2
	265	else:
	266	ytcfg['priority'] -= 3
	267
	268
	269	build_innertube_clients()
	270
	271
	272	class YoutubeBaseInfoExtractor(InfoExtractor):
	273	"""Provide base functions for Youtube extractors"""
	274
	275	_RESERVED_NAMES = (
	276	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	277	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	278	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	279	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	280
	281	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	282
	283	# _NETRC_MACHINE = 'youtube'
	284
	285	# If True it will raise an error if no login info is provided
	286	_LOGIN_REQUIRED = False
	287
	288	_INVIDIOUS_SITES = (
	289	# invidious-redirect websites
	290	r'(?:www\.)?redirect\.invidious\.io',
	291	r'(?:(?:www\|dev)\.)?invidio\.us',
	292	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	293	r'(?:www\.)?invidious\.pussthecat\.org',
	294	r'(?:www\.)?invidious\.zee\.li',
	295	r'(?:www\.)?invidious\.ethibox\.fr',
	296	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	297	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	298	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	299	# youtube-dl invidious instances list
	300	r'(?:(?:www\|no)\.)?invidiou\.sh',
	301	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	302	r'(?:www\.)?invidious\.kabi\.tk',
	303	r'(?:www\.)?invidious\.mastodon\.host',
	304	r'(?:www\.)?invidious\.zapashcanon\.fr',
	305	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	306	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	307	r'(?:www\.)?invidious\.himiko\.cloud',
	308	r'(?:www\.)?invidious\.reallyancient\.tech',
	309	r'(?:www\.)?invidious\.tube',
	310	r'(?:www\.)?invidiou\.site',
	311	r'(?:www\.)?invidious\.site',
	312	r'(?:www\.)?invidious\.xyz',
	313	r'(?:www\.)?invidious\.nixnet\.xyz',
	314	r'(?:www\.)?invidious\.048596\.xyz',
	315	r'(?:www\.)?invidious\.drycat\.fr',
	316	r'(?:www\.)?inv\.skyn3t\.in',
	317	r'(?:www\.)?tube\.poal\.co',
	318	r'(?:www\.)?tube\.connect\.cafe',
	319	r'(?:www\.)?vid\.wxzm\.sx',
	320	r'(?:www\.)?vid\.mint\.lgbt',
	321	r'(?:www\.)?vid\.puffyan\.us',
	322	r'(?:www\.)?yewtu\.be',
	323	r'(?:www\.)?yt\.elukerio\.org',
	324	r'(?:www\.)?yt\.lelux\.fi',
	325	r'(?:www\.)?invidious\.ggc-project\.de',
	326	r'(?:www\.)?yt\.maisputain\.ovh',
	327	r'(?:www\.)?ytprivate\.com',
	328	r'(?:www\.)?invidious\.13ad\.de',
	329	r'(?:www\.)?invidious\.toot\.koeln',
	330	r'(?:www\.)?invidious\.fdn\.fr',
	331	r'(?:www\.)?watch\.nettohikari\.com',
	332	r'(?:www\.)?invidious\.namazso\.eu',
	333	r'(?:www\.)?invidious\.silkky\.cloud',
	334	r'(?:www\.)?invidious\.exonip\.de',
	335	r'(?:www\.)?invidious\.riverside\.rocks',
	336	r'(?:www\.)?invidious\.blamefran\.net',
	337	r'(?:www\.)?invidious\.moomoo\.de',
	338	r'(?:www\.)?ytb\.trom\.tf',
	339	r'(?:www\.)?yt\.cyberhost\.uk',
	340	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	341	r'(?:www\.)?qklhadlycap4cnod\.onion',
	342	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	343	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	344	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	345	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	346	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	347	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	348	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	349	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	350	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	351	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	352	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	353	r'(?:www\.)?piped\.kavin\.rocks',
	354	r'(?:www\.)?piped\.silkky\.cloud',
	355	r'(?:www\.)?piped\.tokhmi\.xyz',
	356	r'(?:www\.)?piped\.moomoo\.me',
	357	r'(?:www\.)?il\.ax',
	358	r'(?:www\.)?piped\.syncpundit\.com',
	359	r'(?:www\.)?piped\.mha\.fi',
	360	r'(?:www\.)?piped\.mint\.lgbt',
	361	r'(?:www\.)?piped\.privacy\.com\.de',
	362	)
	363
	364	def _initialize_consent(self):
	365	cookies = self._get_cookies('https://www.youtube.com/')
	366	if cookies.get('__Secure-3PSID'):
	367	return
	368	consent_id = None
	369	consent = cookies.get('CONSENT')
	370	if consent:
	371	if 'YES' in consent.value:
	372	return
	373	consent_id = self._search_regex(
	374	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	375	if not consent_id:
	376	consent_id = random.randint(100, 999)
	377	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	378
	379	def _initialize_pref(self):
	380	cookies = self._get_cookies('https://www.youtube.com/')
	381	pref_cookie = cookies.get('PREF')
	382	pref = {}
	383	if pref_cookie:
	384	try:
	385	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	386	except ValueError:
	387	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	388	pref.update({'hl': 'en', 'tz': 'UTC'})
	389	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	390
	391	def _real_initialize(self):
	392	self._initialize_pref()
	393	self._initialize_consent()
	394	self._check_login_required()
	395
	396	def _check_login_required(self):
	397	if self._LOGIN_REQUIRED and not self._cookies_passed:
	398	self.raise_login_required('Login details are needed to download this content', method='cookies')
	399
	400	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+})\s;'
	401	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+})\s*;'
	402	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	403
	404	def _get_default_ytcfg(self, client='web'):
	405	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	406
	407	def _get_innertube_host(self, client='web'):
	408	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	409
	410	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	411	# try_get but with fallback to default ytcfg client values when present
	412	_func = lambda y: try_get(y, getter, expected_type)
	413	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	414
	415	def _extract_client_name(self, ytcfg, default_client='web'):
	416	return self._ytcfg_get_safe(
	417	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	418	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	419
	420	def _extract_client_version(self, ytcfg, default_client='web'):
	421	return self._ytcfg_get_safe(
	422	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	423	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	424
	425	def _extract_api_key(self, ytcfg=None, default_client='web'):
	426	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	427
	428	def _extract_context(self, ytcfg=None, default_client='web'):
	429	context = get_first(
	430	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	431	# Enforce language and tz for extraction
	432	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	433	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	434	return context
	435
	436	_SAPISID = None
	437
	438	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	439	time_now = round(time.time())
	440	if self._SAPISID is None:
	441	yt_cookies = self._get_cookies('https://www.youtube.com')
	442	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	443	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	444	sapisid_cookie = dict_get(
	445	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	446	if sapisid_cookie and sapisid_cookie.value:
	447	self._SAPISID = sapisid_cookie.value
	448	self.write_debug('Extracted SAPISID cookie')
	449	# SAPISID cookie is required if not already present
	450	if not yt_cookies.get('SAPISID'):
	451	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	452	self._set_cookie(
	453	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	454	else:
	455	self._SAPISID = False
	456	if not self._SAPISID:
	457	return None
	458	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	459	sapisidhash = hashlib.sha1(
	460	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	461	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	462
	463	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	464	note='Downloading API JSON', errnote='Unable to download API page',
	465	context=None, api_key=None, api_hostname=None, default_client='web'):
	466
	467	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	468	data.update(query)
	469	real_headers = self.generate_api_headers(default_client=default_client)
	470	real_headers.update({'content-type': 'application/json'})
	471	if headers:
	472	real_headers.update(headers)
	473	return self._download_json(
	474	f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	data = self._search_regex(
	481	(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	482	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	483	if data:
	484	return self._parse_json(data, item_id, fatal=fatal)
	485
	486	def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
	487	return self._parse_json(self._search_regex(
	488	(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
	489	regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)
	490
	491	@staticmethod
	492	def _extract_session_index(*data):
	493	"""
	494	Index of current account in account list.
	495	See: https://github.com/yt-dlp/yt-dlp/pull/519
	496	"""
	497	for ytcfg in data:
	498	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	499	if session_index is not None:
	500	return session_index

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import functools # isort: split

19

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

25

compat_urllib_parse_urlencode,

26

compat_urllib_parse_urlparse,

27

compat_urlparse,

28

)

29

from ..jsinterp import JSInterpreter

30

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

74

INNERTUBE_CLIENTS = {

75

'web': {

76

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

77

'INNERTUBE_CONTEXT': {

78

'client': {

79

'clientName': 'WEB',

80

'clientVersion': '2.20211221.00.00',

81

}

82

},

83

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

84

},

85

'web_embedded': {

86

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

87

'INNERTUBE_CONTEXT': {

88

'client': {

89

'clientName': 'WEB_EMBEDDED_PLAYER',

90

'clientVersion': '1.20211215.00.01',

91

},

92

},

93

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

94

},

95

'web_music': {

96

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

97

'INNERTUBE_HOST': 'music.youtube.com',

98

'INNERTUBE_CONTEXT': {

99

'client': {

100

'clientName': 'WEB_REMIX',

101

'clientVersion': '1.20211213.00.00',

102

}

103

},

104

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

105

},

106

'web_creator': {

107

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

108

'INNERTUBE_CONTEXT': {

109

'client': {

110

'clientName': 'WEB_CREATOR',

111

'clientVersion': '1.20211220.02.00',

112

}

113

},

114

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

115

},

116

'android': {

117

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

118

'INNERTUBE_CONTEXT': {

119

'client': {

120

'clientName': 'ANDROID',

121

'clientVersion': '16.49',

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '16.49',

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '4.57',

144

}

145

},

146

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

147

'REQUIRE_JS_PLAYER': False

148

},

149

'android_creator': {

150

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

151

'INNERTUBE_CONTEXT': {

152

'client': {

153

'clientName': 'ANDROID_CREATOR',

154

'clientVersion': '21.47',

155

},

156

},

157

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

158

'REQUIRE_JS_PLAYER': False

159

},

160

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

161

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

162

'ios': {

163

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

164

'INNERTUBE_CONTEXT': {

165

'client': {

166

'clientName': 'IOS',

167

'clientVersion': '16.46',

168

'deviceModel': 'iPhone14,3',

169

}

170

},

171

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

172

'REQUIRE_JS_PLAYER': False

173

},

174

'ios_embedded': {

175

'INNERTUBE_CONTEXT': {

176

'client': {

177

'clientName': 'IOS_MESSAGES_EXTENSION',

178

'clientVersion': '16.46',

179

'deviceModel': 'iPhone14,3',

180

},

181

},

182

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

183

'REQUIRE_JS_PLAYER': False

184

},

185

'ios_music': {

186

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

187

'INNERTUBE_CONTEXT': {

188

'client': {

189

'clientName': 'IOS_MUSIC',

190

'clientVersion': '4.57',

191

},

192

},

193

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

194

'REQUIRE_JS_PLAYER': False

195

},

196

'ios_creator': {

197

'INNERTUBE_CONTEXT': {

198

'client': {

199

'clientName': 'IOS_CREATOR',

200

'clientVersion': '21.47',

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

204

'REQUIRE_JS_PLAYER': False

205

},

206

# mweb has 'ultralow' formats

207

# See: https://github.com/yt-dlp/yt-dlp/pull/557

208

'mweb': {

209

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

210

'INNERTUBE_CONTEXT': {

211

'client': {

212

'clientName': 'MWEB',

213

'clientVersion': '2.20211221.01.00',

214

}

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

217

},

218

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

219

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

220

'tv_embedded': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

225

'clientVersion': '2.0',

226

},

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

234

variant, *base = client_name.rsplit('.', 1)

235

if base:

236

return variant, base[0], variant

237

base, *variant = client_name.split('_', 1)

238

return client_name, base, variant[0] if variant else None

239

240

241

def build_innertube_clients():

242

THIRD_PARTY = {

243

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

244

}

245

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

246

priority = qualities(BASE_CLIENTS[::-1])

247

248

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

249

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

250

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

251

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

252

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

253

254

_, base_client, variant = _split_innertube_client(client)

255

ytcfg['priority'] = 10 * priority(base_client)

256

257

if not variant:

258

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

259

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

260

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

261

embedscreen['priority'] -= 3

262

elif variant == 'embedded':

263

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

ytcfg['priority'] -= 2

265

else:

266

ytcfg['priority'] -= 3

267

268

269

build_innertube_clients()

270

271

272

class YoutubeBaseInfoExtractor(InfoExtractor):

273

"""Provide base functions for Youtube extractors"""

274

275

_RESERVED_NAMES = (

276

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

282

283

# _NETRC_MACHINE = 'youtube'

284

285

# If True it will raise an error if no login info is provided

286

_LOGIN_REQUIRED = False

287

288

_INVIDIOUS_SITES = (

289

# invidious-redirect websites

290

r'(?:www\.)?redirect\.invidious\.io',

291

r'(?:(?:www|dev)\.)?invidio\.us',

292

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

293

r'(?:www\.)?invidious\.pussthecat\.org',

294

r'(?:www\.)?invidious\.zee\.li',

295

r'(?:www\.)?invidious\.ethibox\.fr',

296

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

297

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

298

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

299

# youtube-dl invidious instances list

300

r'(?:(?:www|no)\.)?invidiou\.sh',

301

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

302

r'(?:www\.)?invidious\.kabi\.tk',

303

r'(?:www\.)?invidious\.mastodon\.host',

304

r'(?:www\.)?invidious\.zapashcanon\.fr',

305

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

306

r'(?:www\.)?invidious\.tinfoil-hat\.net',

307

r'(?:www\.)?invidious\.himiko\.cloud',

308

r'(?:www\.)?invidious\.reallyancient\.tech',

309

r'(?:www\.)?invidious\.tube',

310

r'(?:www\.)?invidiou\.site',

311

r'(?:www\.)?invidious\.site',

312

r'(?:www\.)?invidious\.xyz',

313

r'(?:www\.)?invidious\.nixnet\.xyz',

314

r'(?:www\.)?invidious\.048596\.xyz',

315

r'(?:www\.)?invidious\.drycat\.fr',

316

r'(?:www\.)?inv\.skyn3t\.in',

317

r'(?:www\.)?tube\.poal\.co',

318

r'(?:www\.)?tube\.connect\.cafe',

319

r'(?:www\.)?vid\.wxzm\.sx',

320

r'(?:www\.)?vid\.mint\.lgbt',

321

r'(?:www\.)?vid\.puffyan\.us',

322

r'(?:www\.)?yewtu\.be',

323

r'(?:www\.)?yt\.elukerio\.org',

324

r'(?:www\.)?yt\.lelux\.fi',

325

r'(?:www\.)?invidious\.ggc-project\.de',

326

r'(?:www\.)?yt\.maisputain\.ovh',

327

r'(?:www\.)?ytprivate\.com',

328

r'(?:www\.)?invidious\.13ad\.de',

329

r'(?:www\.)?invidious\.toot\.koeln',

330

r'(?:www\.)?invidious\.fdn\.fr',

331

r'(?:www\.)?watch\.nettohikari\.com',

332

r'(?:www\.)?invidious\.namazso\.eu',

333

r'(?:www\.)?invidious\.silkky\.cloud',

334

r'(?:www\.)?invidious\.exonip\.de',

335

r'(?:www\.)?invidious\.riverside\.rocks',

336

r'(?:www\.)?invidious\.blamefran\.net',

337

r'(?:www\.)?invidious\.moomoo\.de',

338

r'(?:www\.)?ytb\.trom\.tf',

339

r'(?:www\.)?yt\.cyberhost\.uk',

340

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

341

r'(?:www\.)?qklhadlycap4cnod\.onion',

342

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

343

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

344

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

345

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

346

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

347

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

348

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

349

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

350

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

351

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

352

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

353

r'(?:www\.)?piped\.kavin\.rocks',

354

r'(?:www\.)?piped\.silkky\.cloud',

355

r'(?:www\.)?piped\.tokhmi\.xyz',

356

r'(?:www\.)?piped\.moomoo\.me',

357

r'(?:www\.)?il\.ax',

358

r'(?:www\.)?piped\.syncpundit\.com',

359

r'(?:www\.)?piped\.mha\.fi',

360

r'(?:www\.)?piped\.mint\.lgbt',

361

r'(?:www\.)?piped\.privacy\.com\.de',

362

)

363

364

def _initialize_consent(self):

365

cookies = self._get_cookies('https://www.youtube.com/')

366

if cookies.get('__Secure-3PSID'):

367

return

368

consent_id = None

369

consent = cookies.get('CONSENT')

370

if consent:

371

if 'YES' in consent.value:

372

return

373

consent_id = self._search_regex(

374

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

375

if not consent_id:

376

consent_id = random.randint(100, 999)

377

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

378

379

def _initialize_pref(self):

380

cookies = self._get_cookies('https://www.youtube.com/')

381

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

386

except ValueError:

387

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

388

pref.update({'hl': 'en', 'tz': 'UTC'})

389

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

390

391

def _real_initialize(self):

392

self._initialize_pref()

393

self._initialize_consent()

394

self._check_login_required()

395

396

def _check_login_required(self):

397

if self._LOGIN_REQUIRED and not self._cookies_passed:

398

self.raise_login_required('Login details are needed to download this content', method='cookies')

399

400

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+})\s*;'

401

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+})\s*;'

402

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

403

404

def _get_default_ytcfg(self, client='web'):

405

return copy.deepcopy(INNERTUBE_CLIENTS[client])

406

407

def _get_innertube_host(self, client='web'):

408

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

409

410

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

411

# try_get but with fallback to default ytcfg client values when present

412

_func = lambda y: try_get(y, getter, expected_type)

413

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

414

415

def _extract_client_name(self, ytcfg, default_client='web'):

416

return self._ytcfg_get_safe(

417

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

418

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

419

420

def _extract_client_version(self, ytcfg, default_client='web'):

421

return self._ytcfg_get_safe(

422

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

423

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

424

425

def _extract_api_key(self, ytcfg=None, default_client='web'):

426

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

427

428

def _extract_context(self, ytcfg=None, default_client='web'):

429

context = get_first(

430

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

431

# Enforce language and tz for extraction

432

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

433

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

439

time_now = round(time.time())

440

if self._SAPISID is None:

441

yt_cookies = self._get_cookies('https://www.youtube.com')

442

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

443

# See: https://github.com/yt-dlp/yt-dlp/issues/393

444

sapisid_cookie = dict_get(

445

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

446

if sapisid_cookie and sapisid_cookie.value:

447

self._SAPISID = sapisid_cookie.value

448

self.write_debug('Extracted SAPISID cookie')

449

# SAPISID cookie is required if not already present

450

if not yt_cookies.get('SAPISID'):

451

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

452

self._set_cookie(

453

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

454

else:

455

self._SAPISID = False

456

if not self._SAPISID:

457

return None

458

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

459

sapisidhash = hashlib.sha1(

460

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

461

return f'SAPISIDHASH {time_now}_{sapisidhash}'

462

463

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

464

note='Downloading API JSON', errnote='Unable to download API page',

465

context=None, api_key=None, api_hostname=None, default_client='web'):

466

467

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

468

data.update(query)

469

real_headers = self.generate_api_headers(default_client=default_client)

470

real_headers.update({'content-type': 'application/json'})

471

if headers:

472

real_headers.update(headers)

473

return self._download_json(

474

f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

data = self._search_regex(

481

(fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',

482

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

483

if data:

484

return self._parse_json(data, item_id, fatal=fatal)

485

486

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

487

return self._parse_json(self._search_regex(

488

(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',

489

regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)

490

491

@staticmethod

492

def _extract_session_index(*data):

493

"""

494

Index of current account in account list.

495

See: https://github.com/yt-dlp/yt-dlp/pull/519

496

"""

497

for ytcfg in data:

498

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

499

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

504

if ytcfg:

505

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

510

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

511

'identity token', default=None, fatal=False)

512

513

@staticmethod

514

def _extract_account_syncid(*args):

515

"""

516

Extract syncId required to download private playlists of secondary channels

517

@params response and/or ytcfg

518

"""

519

for data in args:

520

# ytcfg includes channel_syncid if on secondary channel

521

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

526

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

527

if len(sync_ids) >= 2 and sync_ids[1]:

528

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

529

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

534

"""

535

Extracts visitorData from an API response or ytcfg

536

Appears to be used to track session state

537

"""

538

return get_first(

539

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

540

expected_type=str)

541

542

@functools.cached_property

543

def is_authenticated(self):

544

return bool(self._generate_sapisidhash_header())

545

546

def extract_ytcfg(self, video_id, webpage):

547

if not webpage:

548

return {}

549

return self._parse_json(

550

self._search_regex(

551

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

552

default='{}'), video_id, fatal=False) or {}

553

554

def generate_api_headers(

555

self, *, ytcfg=None, account_syncid=None, session_index=None,

556

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

557

558

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

559

headers = {

560

'X-YouTube-Client-Name': compat_str(

561

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

562

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

563

'Origin': origin,

564

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

565

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

566

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

567

}

568

if session_index is None:

569

session_index = self._extract_session_index(ytcfg)

570

if account_syncid or session_index is not None:

571

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

572

573

auth = self._generate_sapisidhash_header(origin)

574

if auth is not None:

575

headers['Authorization'] = auth

576

headers['X-Origin'] = origin

577

return {h: v for h, v in headers.items() if v is not None}

578

579

def _download_ytcfg(self, client, video_id):

580

url = {

581

'web': 'https://www.youtube.com',

582

'web_music': 'https://music.youtube.com',

583

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

588

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

589

return self.extract_ytcfg(video_id, webpage) or {}

590

591

@staticmethod

592

def _build_api_continuation_query(continuation, ctp=None):

593

query = {

594

'continuation': continuation

595

}

596

# TODO: Inconsistency with clickTrackingParams.

597

# Currently we have a fixed ctp contained within context (from ytcfg)

598

# and a ctp in root query for continuation.

599

if ctp:

600

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

605

next_continuation = try_get(

606

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

607

lambda x: x['continuation']['reloadContinuationData']), dict)

608

if not next_continuation:

609

return

610

continuation = next_continuation.get('continuation')

611

if not continuation:

612

return

613

ctp = next_continuation.get('clickTrackingParams')

614

return cls._build_api_continuation_query(continuation, ctp)

615

616

@classmethod

617

def _extract_continuation_ep_data(cls, continuation_ep: dict):

618

if isinstance(continuation_ep, dict):

619

continuation = try_get(

620

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

621

if not continuation:

622

return

623

ctp = continuation_ep.get('clickTrackingParams')

624

return cls._build_api_continuation_query(continuation, ctp)

625

626

@classmethod

627

def _extract_continuation(cls, renderer):

628

next_continuation = cls._extract_next_continuation_data(renderer)

629

if next_continuation:

630

return next_continuation

631

632

contents = []

633

for key in ('contents', 'items'):

634

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

635

636

for content in contents:

637

if not isinstance(content, dict):

638

continue

639

continuation_ep = try_get(

640

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

641

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

642

dict)

643

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

649

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

650

if not isinstance(alert_dict, dict):

651

continue

652

for alert in alert_dict.values():

653

alert_type = alert.get('type')

654

if not alert_type:

655

continue

656

message = cls._get_text(alert, 'text')

657

if message:

658

yield alert_type, message

659

660

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

661

errors = []

662

warnings = []

663

for alert_type, alert_message in alerts:

664

if alert_type.lower() == 'error' and fatal:

665

errors.append([alert_type, alert_message])

666

else:

667

warnings.append([alert_type, alert_message])

668

669

for alert_type, alert_message in (warnings + errors[:-1]):

670

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

671

if errors:

672

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

673

674

def _extract_and_report_alerts(self, data, *args, **kwargs):

675

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

676

677

def _extract_badges(self, renderer: dict):

678

badges = set()

679

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

680

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

681

if label:

682

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

687

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

692

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

693

obj = [obj]

694

for item in obj:

695

text = try_get(item, lambda x: x['simpleText'], compat_str)

696

if text:

697

return text

698

runs = try_get(item, lambda x: x['runs'], list) or []

699

if not runs and isinstance(item, list):

700

runs = item

701

702

runs = runs[:min(len(runs), max_runs or len(runs))]

703

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

708

count_text = self._get_text(data, *path_list) or ''

709

count = parse_count(count_text)

710

if count is None:

711

count = str_to_int(

712

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

717

"""

718

Extract thumbnails from thumbnails dict

719

@param path_list: path list to level that contains 'thumbnails' key

720

"""

721

thumbnails = []

722

for path in path_list or [()]:

723

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

724

thumbnail_url = url_or_none(thumbnail.get('url'))

725

if not thumbnail_url:

726

continue

727

# Sometimes youtube gives a wrong thumbnail URL. See:

728

# https://github.com/yt-dlp/yt-dlp/issues/233

729

# https://github.com/ytdl-org/youtube-dl/issues/28023

730

if 'maxresdefault' in thumbnail_url:

731

thumbnail_url = thumbnail_url.split('?')[0]

732

thumbnails.append({

733

'url': thumbnail_url,

734

'height': int_or_none(thumbnail.get('height')),

735

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

741

"""

742

Extracts a relative time from string and converts to dt object

743

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

748

if start:

749

return datetime_from_str(start)

750

try:

751

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

756

"""@returns (timestamp, time_text)"""

757

text = self._get_text(renderer, *path_list) or ''

758

dt = self.extract_relative_time(text)

759

timestamp = None

760

if isinstance(dt, datetime.datetime):

761

timestamp = calendar.timegm(dt.timetuple())

762

763

if timestamp is None:

764

timestamp = (

765

unified_timestamp(text) or unified_timestamp(

766

self._search_regex(

767

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

768

text.lower(), 'time text', default=None)))

769

770

if text and timestamp is None:

771

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

772

return timestamp, text

773

774

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

775

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

776

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

781

if check_get_keys is None:

782

check_get_keys = []

783

while count < retries:

784

count += 1

785

if last_error:

786

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

787

try:

788

response = self._call_api(

789

ep=ep, fatal=True, headers=headers,

790

video_id=item_id, query=query,

791

context=self._extract_context(ytcfg, default_client),

792

api_key=self._extract_api_key(ytcfg, default_client),

793

api_hostname=api_hostname, default_client=default_client,

794

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

795

except ExtractorError as e:

796

if isinstance(e.cause, network_exceptions):

797

if isinstance(e.cause, compat_HTTPError):

798

first_bytes = e.cause.read(512)

799

if not is_html(first_bytes):

800

yt_error = try_get(

801

self._parse_json(

802

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

803

lambda x: x['error']['message'], compat_str)

804

if yt_error:

805

self._report_alerts([('ERROR', yt_error)], fatal=False)

806

# Downloading page may result in intermittent 5xx HTTP error

807

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

808

# We also want to catch all other network exceptions since errors in later pages can be troublesome

809

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

810

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

811

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

823

except ExtractorError as e:

824

# YouTube servers may return errors we want to retry on in a 200 OK response

825

# See: https://github.com/yt-dlp/yt-dlp/issues/839

826

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

832

return

833

if not check_get_keys or dict_get(response, check_get_keys):

834

break

835

# Youtube sometimes sends incomplete data

836

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

837

last_error = 'Incomplete data received'

838

if count >= retries:

839

if fatal:

840

raise ExtractorError(last_error)

841

else:

842

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

848

return re.match(r'https?://music\.youtube\.com/', url) is not None

849

850

def _extract_video(self, renderer):

851

video_id = renderer.get('videoId')

852

title = self._get_text(renderer, 'title')

853

description = self._get_text(renderer, 'descriptionSnippet')

854

duration = parse_duration(self._get_text(

855

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

856

if duration is None:

857

duration = parse_duration(self._search_regex(

858

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

859

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

860

video_id, default=None, group='duration'))

861

862

view_count = self._get_count(renderer, 'viewCountText')

863

864

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

865

channel_id = traverse_obj(

866

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

867

expected_type=str, get_all=False)

868

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

869

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

870

overlay_style = traverse_obj(

871

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

872

get_all=False, expected_type=str)

873

badges = self._extract_badges(renderer)

874

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

875

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

876

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

877

expected_type=str)) or ''

878

url = f'https://www.youtube.com/watch?v={video_id}'

879

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

880

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

889

'duration': duration,

890

'view_count': view_count,

891

'uploader': uploader,

892

'channel_id': channel_id,

893

'thumbnails': thumbnails,

894

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

895

if self._configuration_arg('approximate_date', ie_key='youtubetab')

896

else None),

897

'live_status': ('is_upcoming' if scheduled_timestamp is not None

898

else 'was_live' if 'streamed' in time_text.lower()

899

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

900

else None),

901

'release_timestamp': scheduled_timestamp,

902

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

907

IE_DESC = 'YouTube'

908

_VALID_URL = r"""(?x)^

909

(

910

(?:https?://|//) # http(s):// or protocol-independent URL

911

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

912

(?:www\.)?deturl\.com/www\.youtube\.com|

913

(?:www\.)?pwnyoutube\.com|

914

(?:www\.)?hooktube\.com|

915

(?:www\.)?yourepeat\.com|

916

tube\.majestyc\.net|

917

%(invidious)s|

918

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

919

(?:.*?\#/)? # handle anchor (#/) redirect urls

920

(?: # the various things that can precede the ID:

921

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

922

|(?: # or the v= param in all its forms

923

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

924

(?:\?|\#!?) # the params delimiter ? or # or #!

925

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

931

vid\.plus| # or vid.plus/xxxx

932

zwearz\.com/watch| # or zwearz.com/watch/xxxx

933

%(invidious)s

934

)/

935

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

936

)

937

)? # all until now is optional -> you can pass the naked ID

938

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

939

(?(1).+)? # if we found the ID, everything can follow

940

(?:\#|$)""" % {

941

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

942

}

943

_PLAYER_INFO_RE = (

944

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

945

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

946

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

947

)

948

_formats = {

949

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

950

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

951

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

952

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

953

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

954

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

955

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

956

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

957

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

958

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

959

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

960

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

961

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

962

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

963

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

964

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

965

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

966

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

971

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

972

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

973

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

974

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

975

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

976

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

977

978

# Apple HTTP Live Streaming

979

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

980

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

981

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

982

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

983

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

984

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

985

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

986

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

987

988

# DASH mp4 video

989

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

990

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

991

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

992

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

993

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

994

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

995

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

996

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

997

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

998

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

999

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1000

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1001

1002

# Dash mp4 audio

1003

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1004

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1005

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1006

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1007

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1008

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1009

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1010

1011

# Dash webm

1012

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1013

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1014

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1015

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1016

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1017

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1018

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1019

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1020

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1022

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1023

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1024

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1025

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1026

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1027

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1028

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1029

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1030

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1031

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1032

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1033

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1034

1035

# Dash webm audio

1036

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1037

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1038

1039

# Dash webm audio with opus inside

1040

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1041

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1042

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1043

1044

# RTMP (unnamed)

1045

'_rtmp': {'protocol': 'rtmp'},

1046

1047

# av01 video only formats sometimes served with "unknown" codecs

1048

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1049

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1050

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1051

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1052

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1053

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1054

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1055

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1056

}

1057

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1069

'uploader': 'Philipp Hagemeister',

1070

'uploader_id': 'phihag',

1071

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1072

'channel': 'Philipp Hagemeister',

1073

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1074

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1075

'upload_date': '20121002',

1076

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1077

'categories': ['Science & Technology'],

1078

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1083

'playable_in_embed': True,

1084

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1085

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1094

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1099

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1100

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1101

'uploader': 'SET India',

1102

'uploader_id': 'setindia',

1103

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1104

'age_limit': 18,

1105

},

1106

'skip': 'Private video',

1107

},

1108

{

1109

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1110

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1115

'uploader': 'Philipp Hagemeister',

1116

'uploader_id': 'phihag',

1117

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1118

'channel': 'Philipp Hagemeister',

1119

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1120

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1121

'upload_date': '20121002',

1122

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1123

'categories': ['Science & Technology'],

1124

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1129

'playable_in_embed': True,

1130

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1131

'live_status': 'not_live',

1132

'age_limit': 0,

1133

'channel_follower_count': int

1134

},

1135

'params': {

1136

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1141

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1146

'uploader_id': '8KVIDEO',

1147

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1148

'description': '',

1149

'uploader': '8KVIDEO',

1150

'title': 'UHDTV TEST 8K VIDEO.mp4'

1151

},

1152

'params': {

1153

'youtube_include_dash_manifest': True,

1154

'format': '141',

1155

},

1156

'skip': 'format 141 not served anymore',

1157

},

1158

# DASH manifest with encrypted signature

1159

{

1160

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1165

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1166

'duration': 244,

1167

'uploader': 'AfrojackVEVO',

1168

'uploader_id': 'AfrojackVEVO',

1169

'upload_date': '20131011',

1170

'abr': 129.495,

1171

'like_count': int,

1172

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1173

'playable_in_embed': True,

1174

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1175

'view_count': int,

1176

'track': 'The Spark',

1177

'live_status': 'not_live',

1178

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1179

'channel': 'Afrojack',

1180

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1181

'tags': 'count:19',

1182

'availability': 'public',

1183

'categories': ['Music'],

1184

'age_limit': 0,

1185

'alt_title': 'The Spark',

1186

'channel_follower_count': int

1187

},

1188

'params': {

1189

'youtube_include_dash_manifest': True,

1190

'format': '141/bestaudio[ext=m4a]',

1191

},

1192

},

1193

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1194

{

1195

'note': 'Embed allowed age-gate video',

1196

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1201

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1202

'duration': 142,

1203

'uploader': 'The Witcher',

1204

'uploader_id': 'WitcherGame',

1205

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1206

'upload_date': '20140605',

1207

'age_limit': 18,

1208

'categories': ['Gaming'],

1209

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1210

'availability': 'needs_auth',

1211

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1212

'like_count': int,

1213

'channel': 'The Witcher',

1214

'live_status': 'not_live',

1215

'tags': 'count:17',

1216

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1217

'playable_in_embed': True,

1218

'view_count': int,

1219

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1224

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1229

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1230

'upload_date': '20200408',

1231

'uploader_id': 'FlyingKitty900',

1232

'uploader': 'FlyingKitty',

1233

'age_limit': 18,

1234

'availability': 'needs_auth',

1235

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1236

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1237

'channel': 'FlyingKitty',

1238

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1239

'view_count': int,

1240

'categories': ['Entertainment'],

1241

'live_status': 'not_live',

1242

'tags': ['Flyingkitty', 'godzilla 2'],

1243

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1244

'like_count': int,

1245

'duration': 177,

1246

'playable_in_embed': True,

1247

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1252

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1253

'info_dict': {

1254

'id': 'Tq92D6wQ1mg',

1255

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1256

'ext': 'mp4',

1257

'upload_date': '20191228',

1258

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1259

'uploader': 'Projekt Melody',

1260

'description': 'md5:17eccca93a786d51bc67646756894066',

1261

'age_limit': 18,

1262

'like_count': int,

1263

'availability': 'needs_auth',

1264

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1265

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1266

'view_count': int,

1267

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1268

'channel': 'Projekt Melody',

1269

'live_status': 'not_live',

1270

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1271

'playable_in_embed': True,

1272

'categories': ['Entertainment'],

1273

'duration': 106,

1274

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1275

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1280

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1285

'uploader': 'Herr Lurik',

1286

'uploader_id': 'st3in234',

1287

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1288

'upload_date': '20130730',

1289

'track': 'Such mich find mich',

1290

'age_limit': 0,

1291

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1292

'like_count': int,

1293

'playable_in_embed': False,

1294

'creator': 'OOMPH!',

1295

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1296

'view_count': int,

1297

'alt_title': 'Such mich find mich',

1298

'duration': 210,

1299

'channel': 'Herr Lurik',

1300

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1301

'categories': ['Music'],

1302

'availability': 'public',

1303

'uploader_url': 'http://www.youtube.com/user/st3in234',

1304

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1305

'live_status': 'not_live',

1306

'artist': 'OOMPH!',

1307

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1312

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1313

'only_matching': True,

1314

},

1315

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1316

# YouTube Red ad is not captured for creator

1317

{

1318

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1324

'uploader_id': 'deadmau5',

1325

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1326

'creator': 'deadmau5',

1327

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1328

'uploader': 'deadmau5',

1329

'title': 'Deadmau5 - Some Chords (HD)',

1330

'alt_title': 'Some Chords',

1331

'availability': 'public',

1332

'tags': 'count:14',

1333

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1334

'view_count': int,

1335

'live_status': 'not_live',

1336

'channel': 'deadmau5',

1337

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1338

'like_count': int,

1339

'track': 'Some Chords',

1340

'artist': 'deadmau5',

1341

'playable_in_embed': True,

1342

'age_limit': 0,

1343

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1344

'categories': ['Music'],

1345

'album': 'Some Chords',

1346

'channel_follower_count': int

1347

},

1348

'expected_warnings': [

1349

'DASH manifest missing',

1350

]

1351

},

1352

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1353

{

1354

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1360

'uploader_id': 'olympic',

1361

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1362

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1363

'uploader': 'Olympics',

1364

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1365

'like_count': int,

1366

'release_timestamp': 1343767800,

1367

'playable_in_embed': True,

1368

'categories': ['Sports'],

1369

'release_date': '20120731',

1370

'channel': 'Olympics',

1371

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1372

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1373

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1374

'age_limit': 0,

1375

'availability': 'public',

1376

'live_status': 'was_live',

1377

'view_count': int,

1378

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1379

'channel_follower_count': int

1380

},

1381

'params': {

1382

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1392

'duration': 85,

1393

'upload_date': '20110310',

1394

'uploader_id': 'AllenMeow',

1395

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1396

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1397

'uploader': '孫ᄋᄅ',

1398

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1399

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1404

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1405

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1406

'view_count': int,

1407

'categories': ['People & Blogs'],

1408

'like_count': int,

1409

'live_status': 'not_live',

1410

'availability': 'unlisted',

1411

'channel_follower_count': int

1412

},

1413

},

1414

# url_encoded_fmt_stream_map is empty string

1415

{

1416

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1421

'description': '',

1422

'upload_date': '20150404',

1423

'uploader_id': 'spbelect',

1424

'uploader': 'Наблюдатели Петербурга',

1425

},

1426

'params': {

1427

'skip_download': 'requires avconv',

1428

},

1429

'skip': 'This live event has ended.',

1430

},

1431

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1432

{

1433

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1438

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1439

'duration': 220,

1440

'upload_date': '20150625',

1441

'uploader_id': 'dorappi2000',

1442

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1443

'uploader': 'dorappi2000',

1444

'formats': 'mincount:31',

1445

},

1446

'skip': 'not actual anymore',

1447

},

1448

# DASH manifest with segment_list

1449

{

1450

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1451

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1456

'uploader': 'Airtek',

1457

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1458

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1459

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1460

},

1461

'params': {

1462

'youtube_include_dash_manifest': True,

1463

'format': '135', # bestvideo

1464

},

1465

'skip': 'This live event has ended.',

1466

},

1467

{

1468

# Multifeed videos (multiple cameras), URL is for Main Camera

1469

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1470

'info_dict': {

1471

'id': 'jvGDaLqkpTg',

1472

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1473

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1480

'description': 'md5:e03b909557865076822aa169218d6a5d',

1481

'duration': 10643,

1482

'upload_date': '20161111',

1483

'uploader': 'Team PGP',

1484

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1485

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1492

'description': 'md5:e03b909557865076822aa169218d6a5d',

1493

'duration': 10991,

1494

'upload_date': '20161111',

1495

'uploader': 'Team PGP',

1496

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1504

'description': 'md5:e03b909557865076822aa169218d6a5d',

1505

'duration': 10995,

1506

'upload_date': '20161111',

1507

'uploader': 'Team PGP',

1508

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1516

'description': 'md5:e03b909557865076822aa169218d6a5d',

1517

'duration': 10990,

1518

'upload_date': '20161111',

1519

'uploader': 'Team PGP',

1520

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1521

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1526

},

1527

'skip': 'Not multifeed anymore',

1528

},

1529

{

1530

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1531

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1532

'info_dict': {

1533

'id': 'gVfLd0zydlo',

1534

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1535

},

1536

'playlist_count': 2,

1537

'skip': 'Not multifeed anymore',

1538

},

1539

{

1540

'url': 'https://vid.plus/FlRa-iH7PGw',

1541

'only_matching': True,

1542

},

1543

{

1544

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1545

'only_matching': True,

1546

},

1547

{

1548

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1549

# Also tests cut-off URL expansion in video description (see

1550

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1551

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1552

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1557

'alt_title': 'Dark Walk',

1558

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1559

'duration': 133,

1560

'upload_date': '20151119',

1561

'uploader_id': 'IronSoulElf',

1562

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1563

'uploader': 'IronSoulElf',

1564

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1565

'track': 'Dark Walk',

1566

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1567

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1568

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1569

'categories': ['Film & Animation'],

1570

'view_count': int,

1571

'live_status': 'not_live',

1572

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1573

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1574

'tags': 'count:13',

1575

'availability': 'public',

1576

'channel': 'IronSoulElf',

1577

'playable_in_embed': True,

1578

'like_count': int,

1579

'age_limit': 0,

1580

'channel_follower_count': int

1581

},

1582

'params': {

1583

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1588

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1589

'only_matching': True,

1590

},

1591

{

1592

# Video with yt:stretch=17:0

1593

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1598

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1599

'upload_date': '20151107',

1600

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1601

'uploader': 'CH GAMER DROID',

1602

},

1603

'params': {

1604

'skip_download': True,

1605

},

1606

'skip': 'This video does not exist.',

1607

},

1608

{

1609

# Video with incomplete 'yt:stretch=16:'

1610

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1611

'only_matching': True,

1612

},

1613

{

1614

# Video licensed under Creative Commons

1615

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1620

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1621

'duration': 721,

1622

'upload_date': '20150128',

1623

'uploader_id': 'BerkmanCenter',

1624

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1625

'uploader': 'The Berkman Klein Center for Internet & Society',

1626

'license': 'Creative Commons Attribution license (reuse allowed)',

1627

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1628

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1629

'like_count': int,

1630

'age_limit': 0,

1631

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1632

'channel': 'The Berkman Klein Center for Internet & Society',

1633

'availability': 'public',

1634

'view_count': int,

1635

'categories': ['Education'],

1636

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1637

'live_status': 'not_live',

1638

'playable_in_embed': True,

1639

'channel_follower_count': int

1640

},

1641

'params': {

1642

'skip_download': True,

},

},

{

# Channel-like uploader_url

1647

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1652

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1653

'duration': 4060,

1654

'upload_date': '20151120',

1655

'uploader': 'Bernie Sanders',

1656

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1657

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1658

'license': 'Creative Commons Attribution license (reuse allowed)',

1659

'playable_in_embed': True,

1660

'tags': 'count:12',

1661

'like_count': int,

1662

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1663

'age_limit': 0,

1664

'availability': 'public',

1665

'categories': ['News & Politics'],

1666

'channel': 'Bernie Sanders',

1667

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1668

'view_count': int,

1669

'live_status': 'not_live',

1670

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1671

'channel_follower_count': int

1672

},

1673

'params': {

1674

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1679

'only_matching': True,

1680

},

1681

{

1682

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1683

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1684

'only_matching': True,

1685

},

1686

{

1687

# Rental video preview

1688

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1693

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1694

'upload_date': '20150811',

1695

'uploader': 'FlixMatrix',

1696

'uploader_id': 'FlixMatrixKaravan',

1697

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1698

'license': 'Standard YouTube License',

1699

},

1700

'params': {

1701

'skip_download': True,

1702

},

1703

'skip': 'This video is not available.',

1704

},

1705

{

1706

# YouTube Red video with episode data

1707

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1712

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1713

'duration': 2085,

1714

'upload_date': '20170118',

1715

'uploader': 'Vsauce',

1716

'uploader_id': 'Vsauce',

1717

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1718

'series': 'Mind Field',

1719

'season_number': 1,

1720

'episode_number': 1,

1721

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1722

'tags': 'count:12',

1723

'view_count': int,

1724

'availability': 'public',

1725

'age_limit': 0,

1726

'channel': 'Vsauce',

1727

'episode': 'Episode 1',

1728

'categories': ['Entertainment'],

1729

'season': 'Season 1',

1730

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1731

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1732

'like_count': int,

1733

'playable_in_embed': True,

1734

'live_status': 'not_live',

1735

'channel_follower_count': int

1736

},

1737

'params': {

1738

'skip_download': True,

1739

},

1740

'expected_warnings': [

1741

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1746

# as inappropriate or offensive to some audiences.

1747

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1752

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1753

'duration': 965,

1754

'upload_date': '20140124',

1755

'uploader': 'New Century Foundation',

1756

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1757

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1758

},

1759

'params': {

1760

'skip_download': True,

1761

},

1762

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1767

'only_matching': True,

1768

},

1769

{

1770

# geo restricted to JP

1771

'url': 'sJL6WA-aGkQ',

1772

'only_matching': True,

1773

},

1774

{

1775

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1776

'only_matching': True,

1777

},

1778

{

1779

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1780

'only_matching': True,

1781

},

1782

{

1783

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1784

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1785

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1790

'only_matching': True,

1791

},

1792

{

1793

# Video with unsupported adaptive stream type formats

1794

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1799

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1800

'duration': 433,

1801

'upload_date': '20130923',

1802

'uploader': 'Amelia Putri Harwita',

1803

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1804

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1805

'formats': 'maxcount:10',

1806

},

1807

'params': {

1808

'skip_download': True,

1809

'youtube_include_dash_manifest': False,

1810

},

1811

'skip': 'not actual anymore',

1812

},

1813

{

1814

# Youtube Music Auto-generated description

1815

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1820

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1821

'upload_date': '20190312',

1822

'uploader': 'Stephen - Topic',

1823

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1824

'artist': 'Stephen',

1825

'track': 'Voyeur Girl',

1826

'album': 'it\'s too much love to know my dear',

1827

'release_date': '20190313',

1828

'release_year': 2019,

1829

'alt_title': 'Voyeur Girl',

1830

'view_count': int,

1831

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1832

'playable_in_embed': True,

1833

'like_count': int,

1834

'categories': ['Music'],

1835

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1836

'channel': 'Stephen',

1837

'availability': 'public',

1838

'creator': 'Stephen',

1839

'duration': 169,

1840

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1841

'age_limit': 0,

1842

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1843

'tags': 'count:11',

1844

'live_status': 'not_live',

1845

'channel_follower_count': int

1846

},

1847

'params': {

1848

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1853

'only_matching': True,

1854

},

1855

{

1856

# invalid -> valid video id redirection

1857

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1862

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1863

'upload_date': '20090125',

1864

'uploader': 'Prochorowka',

1865

'uploader_id': 'Prochorowka',

1866

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1867

'artist': 'Panjabi MC',

1868

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1869

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1870

},

1871

'params': {

1872

'skip_download': True,

1873

},

1874

'skip': 'Video unavailable',

1875

},

1876

{

1877

# empty description results in an empty string

1878

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1885

'uploader_id': 'ElevageOrVert',

1886

'uploader': 'ElevageOrVert',

1887

'view_count': int,

1888

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1889

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1890

'like_count': int,

1891

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1892

'tags': [],

1893

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1894

'availability': 'public',

1895

'age_limit': 0,

1896

'categories': ['Pets & Animals'],

1897

'duration': 7,

1898

'playable_in_embed': True,

1899

'live_status': 'not_live',

1900

'channel': 'ElevageOrVert',

1901

'channel_follower_count': int

1902

},

1903

'params': {

1904

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1909

# see [2] for an example with '};' inside ytInitialPlayerResponse

1910

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1911

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1912

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1917

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1918

'upload_date': '20130831',

1919

'uploader_id': 'kudvenkat',

1920

'uploader': 'kudvenkat',

1921

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1922

'like_count': int,

1923

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1924

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1925

'live_status': 'not_live',

1926

'categories': ['Education'],

1927

'availability': 'public',

1928

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1929

'tags': 'count:12',

1930

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1935

'channel_follower_count': int

1936

},

1937

'params': {

1938

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1943

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1944

'only_matching': True,

1945

},

1946

{

1947

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1948

'only_matching': True,

1949

},

1950

{

1951

# https://github.com/ytdl-org/youtube-dl/pull/28094

1952

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1958

'upload_date': '20141120',

1959

'uploader': 'The Cinematic Orchestra - Topic',

1960

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1961

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1962

'artist': 'The Cinematic Orchestra',

1963

'track': 'Burn Out',

1964

'album': 'Every Day',

1965

'like_count': int,

1966

'live_status': 'not_live',

1967

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1972

'creator': 'The Cinematic Orchestra',

1973

'channel': 'The Cinematic Orchestra',

1974

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1975

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1976

'availability': 'public',

1977

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1978

'categories': ['Music'],

1979

'playable_in_embed': True,

1980

'channel_follower_count': int

1981

},

1982

'params': {

1983

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1988

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1989

'only_matching': True,

1990

},

1991

{

1992

# controversial video, requires bpctr/contentCheckOk

1993

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1998

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1999

'uploader': 'CBS Mornings',

2000

'uploader_id': 'CBSThisMorning',

2001

'upload_date': '20140716',

2002

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2003

'duration': 170,

2004

'categories': ['News & Politics'],

2005

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2006

'view_count': int,

2007

'channel': 'CBS Mornings',

2008

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2009

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2010

'age_limit': 18,

2011

'availability': 'needs_auth',

2012

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2013

'like_count': int,

2014

'live_status': 'not_live',

2015

'playable_in_embed': True,

2016

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2021

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2026

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2027

'upload_date': '20201120',

2028

'uploader': 'Walk around Japan',

2029

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2030

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2031

'duration': 1456,

2032

'categories': ['Travel & Events'],

2033

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2034

'view_count': int,

2035

'channel': 'Walk around Japan',

2036

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2037

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2038

'age_limit': 0,

2039

'availability': 'public',

2040

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2041

'live_status': 'not_live',

2042

'playable_in_embed': True,

2043

'channel_follower_count': int

2044

},

2045

'params': {

2046

'skip_download': True,

2047

},

2048

}, {

2049

# Has multiple audio streams

2050

'url': 'WaOKSUlf4TM',

2051

'only_matching': True

2052

}, {

2053

# Requires Premium: has format 141 when requested using YTM url

2054

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2055

'only_matching': True

2056

}, {

2057

# multiple subtitles with same lang_code

2058

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2059

'only_matching': True,

2060

}, {

2061

# Force use android client fallback

2062

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2063

'info_dict': {

2064

'id': 'YOelRv7fMxY',

2065

'title': 'DIGGING A SECRET TUNNEL Part 1',

2066

'ext': '3gp',

2067

'upload_date': '20210624',

2068

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2069

'uploader': 'colinfurze',

2070

'uploader_id': 'colinfurze',

2071

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2072

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2073

'duration': 596,

2074

'categories': ['Entertainment'],

2075

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2076

'view_count': int,

2077

'channel': 'colinfurze',

2078

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2079

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2080

'age_limit': 0,

2081

'availability': 'public',

2082

'like_count': int,

2083

'live_status': 'not_live',

2084

'playable_in_embed': True,

2085

'channel_follower_count': int

2086

},

2087

'params': {

2088

'format': '17', # 3gp format available on android

2089

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2094

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2095

'only_matching': True,

2096

'params': {

2097

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2102

'only_matching': True,

2103

}, {

2104

'note': 'Storyboards',

2105

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2111

'uploader_id': 'scishow',

2112

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2113

'upload_date': '20140324',

2114

'uploader': 'SciShow',

2115

'like_count': int,

2116

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2117

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2118

'view_count': int,

2119

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2120

'playable_in_embed': True,

2121

'tags': 'count:12',

2122

'uploader_url': 'http://www.youtube.com/user/scishow',

2123

'availability': 'public',

2124

'channel': 'SciShow',

2125

'live_status': 'not_live',

2126

'duration': 248,

2127

'categories': ['Education'],

2128

'age_limit': 0,

2129

'channel_follower_count': int

2130

}, 'params': {'format': 'mhtml', 'skip_download': True}

2131

}, {

2132

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2133

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2138

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2139

'uploader': 'Leon Nguyen',

2140

'uploader_id': 'VNSXIII',

2141

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2142

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2143

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2148

'tags': 'count:23',

2149

'playable_in_embed': True,

2150

'live_status': 'not_live',

2151

'upload_date': '20220103',

2152

'like_count': int,

2153

'availability': 'public',

2154

'channel': 'Leon Nguyen',

2155

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2156

'channel_follower_count': int

2157

}

2158

}, {

2159

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2160

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2165

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2166

'uploader': 'Quackity',

2167

'uploader_id': 'QuackityHQ',

2168

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2169

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2170

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2175

'tags': 'count:26',

2176

'playable_in_embed': True,

2177

'live_status': 'not_live',

2178

'release_timestamp': 1641172509,

2179

'release_date': '20220103',

2180

'upload_date': '20220103',

2181

'like_count': int,

2182

'availability': 'public',

2183

'channel': 'Quackity',

2184

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2185

'channel_follower_count': int

2186

}

2187

},

2188

{ # continuous livestream. Microformat upload date should be preferred.

2189

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2190

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2191

'info_dict': {

2192

'id': 'kgx4WGK0oNU',

2193

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2194

'ext': 'mp4',

2195

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2196

'availability': 'public',

2197

'age_limit': 0,

2198

'release_timestamp': 1637975704,

2199

'upload_date': '20210619',

2200

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2201

'live_status': 'is_live',

2202

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2203

'uploader': '阿鲍Abao',

2204

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2205

'channel': 'Abao in Tokyo',

2206

'channel_follower_count': int,

2207

'release_date': '20211127',

2208

'tags': 'count:39',

2209

'categories': ['People & Blogs'],

2210

'like_count': int,

2211

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2212

'view_count': int,

2213

'playable_in_embed': True,

2214

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2215

},

2216

'params': {'skip_download': True}

2217

}, {

2218

# Story. Requires specific player params to work.

2219

# Note: stories get removed after some period of time

2220

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2225

'view_count': int,

2226

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2227

'upload_date': '20220526',

2228

'categories': ['Education'],

2229

'title': 'Story',

2230

'channel': 'IT\'S HISTORY',

2231

'description': '',

2232

'uploader_id': 'BlastfromthePast',

2233

'duration': 12,

2234

'uploader': 'IT\'S HISTORY',

2235

'playable_in_embed': True,

2236

'age_limit': 0,

2237

'live_status': 'not_live',

2238

'tags': [],

2239

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2240

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2241

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2242

}

2243

}, {

2244

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2249

'upload_date': '20220323',

2250

'like_count': int,

2251

'availability': 'unlisted',

2252

'channel': 'nao20010128nao',

2253

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2254

'age_limit': 0,

2255

'uploader': 'nao20010128nao',

2256

'uploader_id': 'nao20010128nao',

2257

'categories': ['Music'],

2258

'view_count': int,

2259

'description': '',

2260

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2261

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2262

'live_status': 'not_live',

2263

'playable_in_embed': True,

2264

'channel_follower_count': int,

2265

'duration': 6,

2266

'tags': [],

2267

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2274

from ..utils import parse_qs

2275

2276

qs = parse_qs(url)

2277

if qs.get('list', [None])[0]:

2278

return False

2279

return super().suitable(url)

2280

2281

def __init__(self, *args, **kwargs):

2282

super().__init__(*args, **kwargs)

2283

self._code_cache = {}

2284

self._player_cache = {}

2285

2286

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2287

lock = threading.Lock()

2288

2289

is_live = True

2290

start_time = time.time()

2291

formats = [f for f in formats if f.get('is_from_start')]

2292

2293

def refetch_manifest(format_id, delay):

2294

nonlocal formats, start_time, is_live

2295

if time.time() <= start_time + delay:

2296

return

2297

2298

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2299

video_details = traverse_obj(

2300

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2301

microformats = traverse_obj(

2302

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2303

expected_type=dict, default=[])

2304

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2305

start_time = time.time()

2306

2307

def mpd_feed(format_id, delay):

2308

"""

2309

@returns (manifest_url, manifest_stream_number, is_live) or None

2310

"""

2311

with lock:

2312

refetch_manifest(format_id, delay)

2313

2314

f = next((f for f in formats if f['format_id'] == format_id), None)

2315

if not f:

2316

if not is_live:

2317

self.to_screen(f'{video_id}: Video is no longer live')

2318

else:

2319

self.report_warning(

2320

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2321

return None

2322

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2327

f['fragments'] = functools.partial(

2328

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2329

2330

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2331

FETCH_SPAN, MAX_DURATION = 5, 432000

2332

2333

mpd_url, stream_number, is_live = None, None, True

2334

2335

begin_index = 0

2336

download_start_time = ctx.get('start') or time.time()

2337

2338

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2339

if lack_early_segments:

2340

self.report_warning(bug_reports_message(

2341

'Starting download from the last 120 hours of the live stream since '

2342

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2343

lack_early_segments = True

2344

2345

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2346

fragments, fragment_base_url = None, None

2347

2348

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2349

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2350

# Obtain from MPD's maximum seq value

2351

old_mpd_url = mpd_url

2352

last_error = ctx.pop('last_error', None)

2353

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2354

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2355

or (mpd_url, stream_number, False))

2356

if not refresh_sequence:

2357

if expire_fast and not is_live:

2358

return False, last_seq

2359

elif old_mpd_url == mpd_url:

2360

return True, last_seq

2361

try:

2362

fmts, _ = self._extract_mpd_formats_and_subtitles(

2363

mpd_url, None, note=False, errnote=False, fatal=False)

2364

except ExtractorError:

2365

fmts = None

2366

if not fmts:

2367

no_fragment_score += 2

2368

return False, last_seq

2369

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2370

fragments = fmt_info['fragments']

2371

fragment_base_url = fmt_info['fragment_base_url']

2372

assert fragment_base_url

2373

2374

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2375

return True, _last_seq

2376

2377

while is_live:

2378

fetch_time = time.time()

2379

if no_fragment_score > 30:

2380

return

2381

if last_segment_url:

2382

# Obtain from "X-Head-Seqnum" header value from each segment

2383

try:

2384

urlh = self._request_webpage(

2385

last_segment_url, None, note=False, errnote=False, fatal=False)

2386

except ExtractorError:

2387

urlh = None

2388

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2389

if last_seq is None:

2390

no_fragment_score += 2

2391

last_segment_url = None

2392

continue

2393

else:

2394

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2395

no_fragment_score += 2

2396

if not should_continue:

2397

continue

2398

2399

if known_idx > last_seq:

2400

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2406

# skip from the start when it's negative value

2407

known_idx = last_seq + begin_index

2408

if lack_early_segments:

2409

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2410

try:

2411

for idx in range(known_idx, last_seq):

2412

# do not update sequence here or you'll get skipped some part of it

2413

should_continue, _ = _extract_sequence_from_mpd(False, False)

2414

if not should_continue:

2415

known_idx = idx - 1

2416

raise ExtractorError('breaking out of outer loop')

2417

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2418

yield {

2419

'url': last_segment_url,

2420

}

2421

if known_idx == last_seq:

2422

no_fragment_score += 5

2423

else:

2424

no_fragment_score = 0

2425

known_idx = last_seq

2426

except ExtractorError:

2427

continue

2428

2429

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2430

2431

def _extract_player_url(self, *ytcfgs, webpage=None):

2432

player_url = traverse_obj(

2433

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2434

get_all=False, expected_type=compat_str)

2435

if not player_url:

2436

return

2437

return urljoin('https://www.youtube.com', player_url)

2438

2439

def _download_player_url(self, video_id, fatal=False):

2440

res = self._download_webpage(

2441

'https://www.youtube.com/iframe_api',

2442

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2443

if res:

2444

player_version = self._search_regex(

2445

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2446

if player_version:

2447

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2448

2449

def _signature_cache_id(self, example_sig):

2450

""" Return a string representation of a signature """

2451

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2452

2453

@classmethod

2454

def _extract_player_info(cls, player_url):

2455

for player_re in cls._PLAYER_INFO_RE:

2456

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2461

return id_m.group('id')

2462

2463

def _load_player(self, video_id, player_url, fatal=True):

2464

player_id = self._extract_player_info(player_url)

2465

if player_id not in self._code_cache:

2466

code = self._download_webpage(

2467

player_url, video_id, fatal=fatal,

2468

note='Downloading player ' + player_id,

2469

errnote='Download of %s failed' % player_url)

2470

if code:

2471

self._code_cache[player_id] = code

2472

return self._code_cache.get(player_id)

2473

2474

def _extract_signature_function(self, video_id, player_url, example_sig):

2475

player_id = self._extract_player_info(player_url)

2476

2477

# Read from filesystem cache

2478

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2479

assert os.path.basename(func_id) == func_id

2480

2481

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2482

if cache_spec is not None:

2483

return lambda s: ''.join(s[i] for i in cache_spec)

2484

2485

code = self._load_player(video_id, player_url)

2486

if code:

2487

res = self._parse_sig_js(code)

2488

2489

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2490

cache_res = res(test_string)

2491

cache_spec = [ord(c) for c in cache_res]

2492

2493

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2494

return res

2495

2496

def _print_sig_code(self, func, example_sig):

2497

if not self.get_param('youtube_print_sig_code'):

2498

return

2499

2500

def gen_sig_code(idxs):

2501

def _genslice(start, end, step):

2502

starts = '' if start == 0 else str(start)

2503

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2504

steps = '' if step == 1 else (':%d' % step)

2505

return f's[{starts}{ends}{steps}]'

2506

2507

step = None

2508

# Quelch pyflakes warnings - start will be set when step is set

2509

start = '(Never used)'

2510

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2515

step = None

2516

continue

2517

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2527

2528

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2529

cache_res = func(test_string)

2530

cache_spec = [ord(c) for c in cache_res]

2531

expr_code = ' + '.join(gen_sig_code(cache_spec))

2532

signature_id_tuple = '(%s)' % (

2533

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2534

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2535

' return %s\n') % (signature_id_tuple, expr_code)

2536

self.to_screen('Extracted signature function:\n' + code)

2537

2538

def _parse_sig_js(self, jscode):

2539

funcname = self._search_regex(

2540

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2541

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2542

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2543

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2544

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2545

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2546

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2547

# Obsolete patterns

2548

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2549

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2550

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2551

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2552

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2553

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2554

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2555

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2556

jscode, 'Initial JS player signature function name', group='sig')

2557

2558

jsi = JSInterpreter(jscode)

2559

initial_function = jsi.extract_function(funcname)

2560

return lambda s: initial_function([s])

2561

2562

def _decrypt_signature(self, s, video_id, player_url):

2563

"""Turn the encrypted s field into a working signature"""

2564

try:

2565

player_id = (player_url, self._signature_cache_id(s))

2566

if player_id not in self._player_cache:

2567

func = self._extract_signature_function(video_id, player_url, s)

2568

self._player_cache[player_id] = func

2569

func = self._player_cache[player_id]

2570

self._print_sig_code(func, s)

2571

return func(s)

2572

except Exception as e:

2573

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2574

2575

def _decrypt_nsig(self, s, video_id, player_url):

2576

"""Turn the encrypted n field into a working signature"""

2577

if player_url is None:

2578

raise ExtractorError('Cannot decrypt nsig without player_url')

2579

player_url = urljoin('https://www.youtube.com', player_url)

2580

2581

sig_id = ('nsig_value', s)

2582

if sig_id in self._player_cache:

2583

return self._player_cache[sig_id]

2584

2585

try:

2586

player_id = ('nsig', player_url)

2587

if player_id not in self._player_cache:

2588

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2589

func = self._player_cache[player_id]

2590

self._player_cache[sig_id] = func(s)

2591

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2592

return self._player_cache[sig_id]

2593

except Exception as e:

2594

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2595

2596

def _extract_n_function_name(self, jscode):

2597

nfunc, idx = self._search_regex(

2598

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2599

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2600

if not idx:

2601

return nfunc

2602

return json.loads(js_to_json(self._search_regex(

2603

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2604

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2605

2606

def _extract_n_function(self, video_id, player_url):

2607

player_id = self._extract_player_info(player_url)

2608

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2609

2610

if func_code:

2611

jsi = JSInterpreter(func_code)

2612

else:

2613

jscode = self._load_player(video_id, player_url)

2614

funcname = self._extract_n_function_name(jscode)

2615

jsi = JSInterpreter(jscode)

2616

func_code = jsi.extract_function_code(funcname)

2617

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2618

2619

if self.get_param('youtube_print_sig_code'):

2620

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2621

2622

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2623

2624

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2625

"""

2626

Extract signatureTimestamp (sts)

2627

Required to tell API what sig/player version is in use.

2628

"""

2629

sts = None

2630

if isinstance(ytcfg, dict):

2631

sts = int_or_none(ytcfg.get('STS'))

2632

2633

if not sts:

2634

# Attempt to extract from player

2635

if player_url is None:

2636

error_msg = 'Cannot extract signature timestamp without player_url.'

2637

if fatal:

2638

raise ExtractorError(error_msg)

2639

self.report_warning(error_msg)

2640

return

2641

code = self._load_player(video_id, player_url, fatal=fatal)

2642

if code:

2643

sts = int_or_none(self._search_regex(

2644

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2645

'JS player signature timestamp', group='sts', fatal=fatal))

2646

return sts

2647

2648

def _mark_watched(self, video_id, player_responses):

2649

playback_url = get_first(

2650

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2651

expected_type=url_or_none)

2652

if not playback_url:

2653

self.report_warning('Unable to mark watched')

2654

return

2655

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2656

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2657

2658

# cpn generation algorithm is reverse engineered from base.js.

2659

# In fact it works even with dummy cpn.

2660

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2661

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2668

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2669

2670

self._download_webpage(

2671

playback_url, video_id, 'Marking watched',

2672

'Unable to mark watched', fatal=False)

2673

2674

@staticmethod

2675

def _extract_urls(webpage):

2676

# Embedded YouTube player

2677

entries = [

2678

unescapeHTML(mobj.group('url'))

2679

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2690

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2691

\1''', webpage)]

2692

2693

# lazyYT YouTube embed

2694

entries.extend(list(map(

2695

unescapeHTML,

2696

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2697

2698

# Wordpress "YouTube Video Importer" plugin

2699

matches = re.findall(r'''(?x)<div[^>]+

2700

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2701

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2702

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2708

urls = YoutubeIE._extract_urls(webpage)

2709

return urls[0] if urls else None

2710

2711

@classmethod

2712

def extract_id(cls, url):

2713

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2714

if mobj is None:

2715

raise ExtractorError('Invalid URL: %s' % url)

2716

return mobj.group('id')

2717

2718

def _extract_chapters_from_json(self, data, duration):

2719

chapter_list = traverse_obj(

2720

data, (

2721

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2722

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2723

), expected_type=list)

2724

2725

return self._extract_chapters(

2726

chapter_list,

2727

chapter_time=lambda chapter: float_or_none(

2728

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2729

chapter_title=lambda chapter: traverse_obj(

2730

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2731

duration=duration)

2732

2733

def _extract_chapters_from_engagement_panel(self, data, duration):

2734

content_list = traverse_obj(

2735

data,

2736

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2737

expected_type=list, default=[])

2738

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2739

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2740

2741

return next(filter(None, (

2742

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2743

chapter_time, chapter_title, duration)

2744

for contents in content_list)), [])

2745

2746

def _extract_chapters_from_description(self, description, duration):

2747

return self._extract_chapters(

2748

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2749

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2750

duration=duration, strict=False)

2751

2752

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2757

'title': chapter_title(chapter),

2758

} for chapter in chapter_list or []]

2759

if not strict:

2760

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2761

2762

chapters = [{'start_time': 0, 'title': '<Untitled>'}]

2763

for idx, chapter in enumerate(chapter_list):

2764

if chapter['start_time'] is None or not chapter['title']:

2765

self.report_warning(f'Incomplete chapter {idx}')

2766

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2767

chapters[-1]['end_time'] = chapter['start_time']

2768

chapters.append(chapter)

2769

else:

2770

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2771

chapters[-1]['end_time'] = duration

2772

return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]

2773

2774

def _extract_comment(self, comment_renderer, parent=None):

2775

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2780

2781

# note: timestamp is an estimate calculated from the current time and time_text

2782

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2783

author = self._get_text(comment_renderer, 'authorText')

2784

author_id = try_get(comment_renderer,

2785

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2786

2787

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2788

lambda x: x['likeCount']), compat_str)) or 0

2789

author_thumbnail = try_get(comment_renderer,

2790

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2791

2792

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2793

is_favorited = 'creatorHeart' in (try_get(

2794

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2799

'time_text': time_text,

2800

'like_count': votes,

2801

'is_favorited': is_favorited,

2802

'author': author,

2803

'author_id': author_id,

2804

'author_thumbnail': author_thumbnail,

2805

'author_is_uploader': author_is_uploader,

2806

'parent': parent or 'root'

2807

}

2808

2809

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2810

2811

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2812

2813

def extract_header(contents):

2814

_continuation = None

2815

for content in contents:

2816

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2817

expected_comment_count = self._get_count(

2818

comments_header_renderer, 'countText', 'commentsCount')

2819

2820

if expected_comment_count:

2821

tracker['est_total'] = expected_comment_count

2822

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2823

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2824

2825

sort_menu_item = try_get(

2826

comments_header_renderer,

2827

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2828

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2829

2830

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2831

if not _continuation:

2832

continue

2833

2834

sort_text = str_or_none(sort_menu_item.get('title'))

2835

if not sort_text:

2836

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2837

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2842

if not parent:

2843

tracker['current_page_thread'] = 0

2844

for content in contents:

2845

if not parent and tracker['total_parent_comments'] >= max_parents:

2846

yield

2847

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2848

comment_renderer = get_first(

2849

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2850

expected_type=dict, default={})

2851

2852

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2857

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2858

yield comment

2859

2860

# Attempt to get the replies

2861

comment_replies_renderer = try_get(

2862

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2863

2864

if comment_replies_renderer:

2865

tracker['current_page_thread'] += 1

2866

comment_entries_iter = self._comment_entries(

2867

comment_replies_renderer, ytcfg, video_id,

2868

parent=comment.get('id'), tracker=tracker)

2869

yield from itertools.islice(comment_entries_iter, min(

2870

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2871

2872

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2878

total_parent_comments=0,

2879

total_reply_comments=0)

2880

2881

# TODO: Deprecated

2882

# YouTube comments have a max depth of 2

2883

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2884

if max_depth:

2885

self._downloader.deprecation_warning(

2886

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2887

if max_depth == 1 and parent:

2888

return

2889

2890

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2891

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2892

2893

continuation = self._extract_continuation(root_continuation_data)

2894

2895

response = None

2896

is_forced_continuation = False

2897

is_first_continuation = parent is None

2898

if is_first_continuation and not continuation:

2899

# Sometimes you can get comments by generating the continuation yourself,

2900

# even if YouTube initially reports them being disabled - e.g. stories comments.

2901

# Note: if the comment section is actually disabled, YouTube may return a response with

2902

# required check_get_keys missing. So we will disable that check initially in this case.

2903

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2904

is_forced_continuation = True

2905

2906

for page_num in itertools.count(0):

2907

if not continuation:

2908

break

2909

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2910

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2911

if page_num == 0:

2912

if is_first_continuation:

2913

note_prefix = 'Downloading comment section API JSON'

2914

else:

2915

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2916

tracker['current_page_thread'], comment_prog_str)

2917

else:

2918

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2919

' ' if parent else '', ' replies' if parent else '',

2920

page_num, comment_prog_str)

2921

2922

response = self._extract_response(

2923

item_id=None, query=continuation,

2924

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2925

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2926

is_forced_continuation = False

2927

continuation_contents = traverse_obj(

2928

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2929

2930

continuation = None

2931

for continuation_section in continuation_contents:

2932

continuation_items = traverse_obj(

2933

continuation_section,

2934

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2935

get_all=False, expected_type=list) or []

2936

if is_first_continuation:

2937

continuation = extract_header(continuation_items)

2938

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2952

if message and not parent and tracker['running_total'] == 0:

2953

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2954

2955

@staticmethod

2956

def _generate_comment_continuation(video_id):

2957

"""

2958

Generates initial comment section continuation token from given video id

2959

"""

2960

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2961

return base64.b64encode(token.encode()).decode()

2962

2963

def _get_comments(self, ytcfg, video_id, contents, webpage):

2964

"""Entry for comment extraction"""

2965

def _real_comment_extract(contents):

2966

renderer = next((

2967

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2968

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2969

yield from self._comment_entries(renderer, ytcfg, video_id)

2970

2971

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2972

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2973

2974

@staticmethod

2975

def _get_checkok_params():

2976

return {'contentCheckOk': True, 'racyCheckOk': True}

2977

2978

@classmethod

2979

def _generate_player_context(cls, sts=None):

2980

context = {

2981

'html5Preference': 'HTML5_PREF_WANTS',

2982

}

2983

if sts is not None:

2984

context['signatureTimestamp'] = sts

2985

return {

2986

'playbackContext': {

2987

'contentPlaybackContext': context

2988

},

2989

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2994

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2995

return True

2996

2997

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2998

AGE_GATE_REASONS = (

2999

'confirm your age', 'age-restricted', 'inappropriate', # reason

3000

'age_verification_required', 'age_check_required', # status

3001

)

3002

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3003

3004

@staticmethod

3005

def _is_unplayable(player_response):

3006

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3007

3008

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3009

3010

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3011

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3012

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3013

headers = self.generate_api_headers(

3014

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3019

}

3020

yt_query.update(self._generate_player_context(sts))

3021

return self._extract_response(

3022

item_id=video_id, ep='player', query=yt_query,

3023

ytcfg=player_ytcfg, headers=headers, fatal=True,

3024

default_client=client,

3025

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3026

) or None

3027

3028

def _get_requested_clients(self, url, smuggled_data):

3029

requested_clients = []

3030

default = ['android', 'web']

3031

allowed_clients = sorted(

3032

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3033

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3034

for client in self._configuration_arg('player_client'):

3035

if client in allowed_clients:

3036

requested_clients.append(client)

3037

elif client == 'default':

3038

requested_clients.extend(default)

3039

elif client == 'all':

3040

requested_clients.extend(allowed_clients)

3041

else:

3042

self.report_warning(f'Skipping unsupported client {client}')

3043

if not requested_clients:

3044

requested_clients = default

3045

3046

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3047

requested_clients.extend(

3048

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3049

3050

return orderedSet(requested_clients)

3051

3052

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3053

initial_pr = None

3054

if webpage:

3055

initial_pr = self._extract_yt_initial_variable(

3056

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

3057

video_id, 'initial player response')

3058

3059

all_clients = set(clients)

3060

clients = clients[::-1]

3061

prs = []

3062

3063

def append_client(*client_names):

3064

""" Append the first client name that exists but not already used """

3065

for client_name in client_names:

3066

actual_client = _split_innertube_client(client_name)[0]

3067

if actual_client in INNERTUBE_CLIENTS:

3068

if actual_client not in all_clients:

3069

clients.append(client_name)

3070

all_clients.add(actual_client)

3071

return

3072

3073

# Android player_response does not have microFormats which are needed for

3074

# extraction of some data. So we return the initial_pr with formats

3075

# stripped out even if not requested by the user

3076

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3077

if initial_pr:

3078

pr = dict(initial_pr)

3079

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3084

player_url = None

3085

while clients:

3086

client, base_client, variant = _split_innertube_client(clients.pop())

3087

player_ytcfg = master_ytcfg if client == 'web' else {}

3088

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3089

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3090

3091

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3092

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3093

if 'js' in self._configuration_arg('player_skip'):

3094

require_js_player = False

3095

player_url = None

3096

3097

if not player_url and not tried_iframe_fallback and require_js_player:

3098

player_url = self._download_player_url(video_id)

3099

tried_iframe_fallback = True

3100

3101

try:

3102

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3103

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3104

except ExtractorError as e:

3105

if last_error:

3106

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3114

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3115

append_client(f'{base_client}_creator')

3116

elif self._is_agegated(pr):

3117

if variant == 'tv_embedded':

3118

append_client(f'{base_client}_embedded')

3119

elif not variant:

3120

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3126

return prs, player_url

3127

3128

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3129

itags, stream_ids = {}, []

3130

itag_qualities, res_qualities = {}, {}

3131

q = qualities([

3132

# Normally tiny is the smallest video-only formats. But

3133

# audio-only formats with unknown quality may get tagged as tiny

3134

'tiny',

3135

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3136

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3137

])

3138

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3139

3140

for fmt in streaming_formats:

3141

if fmt.get('targetDurationSec'):

3142

continue

3143

3144

itag = str_or_none(fmt.get('itag'))

3145

audio_track = fmt.get('audioTrack') or {}

3146

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3147

if stream_id in stream_ids:

3148

continue

3149

3150

quality = fmt.get('quality')

3151

height = int_or_none(fmt.get('height'))

3152

if quality == 'tiny' or not quality:

3153

quality = fmt.get('audioQuality', '').lower() or quality

3154

# The 3gp format (17) in android client has a quality of "small",

3155

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3161

if height:

3162

res_qualities[height] = quality

3163

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3164

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3165

# number of fragment that would subsequently requested with (`&sq=N`)

3166

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3167

continue

3168

3169

fmt_url = fmt.get('url')

3170

if not fmt_url:

3171

sc = compat_parse_qs(fmt.get('signatureCipher'))

3172

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3173

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3174

if not all((sc, fmt_url, player_url, encrypted_sig)):

3175

continue

3176

try:

3177

fmt_url += '&%s=%s' % (

3178

traverse_obj(sc, ('sp', -1)) or 'signature',

3179

self._decrypt_signature(encrypted_sig, video_id, player_url)

3180

)

3181

except ExtractorError as e:

3182

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3183

self.write_debug(e, only_once=True)

3184

continue

3185

3186

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3191

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3192

except ExtractorError as e:

3193

self.report_warning(

3194

'nsig extraction failed: You may experience throttling for some formats\n'

3195

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3196

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3201

stream_ids.append(stream_id)

3202

3203

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3204

language_preference = (

3205

10 if audio_track.get('audioIsDefault') and 10

3206

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3207

else -1)

3208

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3209

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3210

# Make sure to avoid false positives with small duration differences.

3211

# Eg: __2ABJjxzNo, ySuUZEjARPY

3212

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3213

if is_damaged:

3214

self.report_warning(

3215

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3216

dct = {

3217

'asr': int_or_none(fmt.get('audioSampleRate')),

3218

'filesize': int_or_none(fmt.get('contentLength')),

3219

'format_id': itag,

3220

'format_note': join_nonempty(

3221

'%s%s' % (audio_track.get('displayName') or '',

3222

' (default)' if language_preference > 0 else ''),

3223

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3224

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3225

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3226

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3227

'fps': int_or_none(fmt.get('fps')) or None,

3228

'height': height,

3229

'quality': q(quality),

3230

'has_drm': bool(fmt.get('drmFamilies')),

3231

'tbr': tbr,

3232

'url': fmt_url,

3233

'width': int_or_none(fmt.get('width')),

3234

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3235

'desc' if language_preference < -1 else ''),

3236

'language_preference': language_preference,

3237

# Strictly de-prioritize damaged and 3gp formats

3238

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3239

}

3240

mime_mobj = re.match(

3241

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3242

if mime_mobj:

3243

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3244

dct.update(parse_codecs(mime_mobj.group(2)))

3245

no_audio = dct.get('acodec') == 'none'

3246

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3252

dct['downloader_options'] = {

3253

# Youtube throttles chunks >~10M

3254

'http_chunk_size': 10485760,

3255

}

3256

if dct.get('ext'):

3257

dct['container'] = dct['ext'] + '_dash'

3258

yield dct

3259

3260

live_from_start = is_live and self.get_param('live_from_start')

3261

skip_manifests = self._configuration_arg('skip')

3262

if not self.get_param('youtube_include_hls_manifest', True):

3263

skip_manifests.append('hls')

3264

if not self.get_param('youtube_include_dash_manifest', True):

3265

skip_manifests.append('dash')

3266

get_dash = 'dash' not in skip_manifests and (

3267

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3268

get_hls = not live_from_start and 'hls' not in skip_manifests

3269

3270

def process_manifest_format(f, proto, itag):

3271

if itag in itags:

3272

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3273

return False

3274

itag = f'{itag}-{proto}'

3275

if itag:

3276

f['format_id'] = itag

3277

itags[itag] = proto

3278

3279

f['quality'] = next((

3280

q(qdict[val])

3281

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3282

if val in qdict), -1)

3283

return True

3284

3285

for sd in streaming_data:

3286

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3287

if hls_manifest_url:

3288

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3289

if process_manifest_format(f, 'hls', self._search_regex(

3290

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3291

yield f

3292

3293

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3294

if dash_manifest_url:

3295

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3296

if process_manifest_format(f, 'dash', f['format_id']):

3297

f['filesize'] = int_or_none(self._search_regex(

3298

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3299

if live_from_start:

3300

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3305

spec = get_first(

3306

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3307

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3312

args = args.split('#')

3313

counts = list(map(int_or_none, args[:5]))

3314

if len(args) != 8 or not all(counts):

3315

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3316

continue

3317

width, height, frame_count, cols, rows = counts

3318

N, sigh = args[6:]

3319

3320

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3321

fragment_count = frame_count / (cols * rows)

3322

fragment_duration = duration / fragment_count

3323

yield {

3324

'format_id': f'sb{i}',

3325

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3335

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3336

} for j in range(math.ceil(fragment_count))],

3337

}

3338

3339

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3340

webpage = None

3341

if 'webpage' not in self._configuration_arg('player_skip'):

3342

webpage = self._download_webpage(

3343

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3344

3345

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3346

3347

player_responses, player_url = self._extract_player_responses(

3348

self._get_requested_clients(url, smuggled_data),

3349

video_id, webpage, master_ytcfg)

3350

3351

return webpage, master_ytcfg, player_responses, player_url

3352

3353

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3354

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3355

is_live = get_first(video_details, 'isLive')

3356

if is_live is None:

3357

is_live = get_first(live_broadcast_details, 'isLiveNow')

3358

3359

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3360

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3361

3362

return live_broadcast_details, is_live, streaming_data, formats

3363

3364

def _real_extract(self, url):

3365

url, smuggled_data = unsmuggle_url(url, {})

3366

video_id = self._match_id(url)

3367

3368

base_url = self.http_scheme() + '//www.youtube.com/'

3369

webpage_url = base_url + 'watch?v=' + video_id

3370

3371

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3372

3373

playability_statuses = traverse_obj(

3374

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3375

3376

trailer_video_id = get_first(

3377

playability_statuses,

3378

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3379

expected_type=str)

3380

if trailer_video_id:

3381

return self.url_result(

3382

trailer_video_id, self.ie_key(), trailer_video_id)

3383

3384

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3385

if webpage else (lambda x: None))

3386

3387

video_details = traverse_obj(

3388

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3389

microformats = traverse_obj(

3390

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3391

expected_type=dict, default=[])

3392

video_title = (

3393

get_first(video_details, 'title')

3394

or self._get_text(microformats, (..., 'title'))

3395

or search_meta(['og:title', 'twitter:title', 'title']))

3396

video_description = get_first(video_details, 'shortDescription')

3397

3398

multifeed_metadata_list = get_first(

3399

player_responses,

3400

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3401

expected_type=str)

3402

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3403

if self.get_param('noplaylist'):

3404

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3409

# Unquote should take place before split on comma (,) since textual

3410

# fields may contain comma as well (see

3411

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3412

feed_data = compat_parse_qs(

3413

compat_urllib_parse_unquote_plus(feed))

3414

3415

def feed_entry(name):

3416

return try_get(

3417

feed_data, lambda x: x[name][0], compat_str)

3418

3419

feed_id = feed_entry('id')

3420

if not feed_id:

3421

continue

3422

feed_title = feed_entry('title')

3423

title = video_title

3424

if feed_title:

3425

title += ' (%s)' % feed_title

3426

entries.append({

3427

'_type': 'url_transparent',

3428

'ie_key': 'Youtube',

3429

'url': smuggle_url(

3430

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3431

{'force_singlefeed': True}),

3432

'title': title,

3433

})

3434

feed_ids.append(feed_id)

3435

self.to_screen(

3436

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3437

% (', '.join(feed_ids), video_id))

3438

return self.playlist_result(

3439

entries, video_id, video_title, video_description)

3440

3441

duration = int_or_none(

3442

get_first(video_details, 'lengthSeconds')

3443

or get_first(microformats, 'lengthSeconds')

3444

or parse_duration(search_meta('duration'))) or None

3445

3446

if get_first(video_details, 'isPostLiveDvr'):

3447

self.write_debug('Video is in Post-Live Manifestless mode')

3448

if duration or 0 > 4 * 3600:

3449

self.report_warning(

3450

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3451

'This is a known issue and patches are welcome')

3452

3453

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3454

video_id, microformats, video_details, player_responses, player_url, duration)

3455

3456

if not formats:

3457

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3458

self.report_drm(video_id)

3459

pemr = get_first(

3460

playability_statuses,

3461

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3462

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3463

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3464

if subreason:

3465

if subreason == 'The uploader has not made this video available in your country.':

3466

countries = get_first(microformats, 'availableCountries')

3467

if not countries:

3468

regions_allowed = search_meta('regionsAllowed')

3469

countries = regions_allowed.split(',') if regions_allowed else None

3470

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3471

reason += f'. {subreason}'

3472

if reason:

3473

self.raise_no_formats(reason, expected=True)

3474

3475

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3476

if not keywords and webpage:

3477

keywords = [

3478

unescapeHTML(m.group('content'))

3479

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3480

for keyword in keywords:

3481

if keyword.startswith('yt:stretch='):

3482

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3483

if mobj:

3484

# NB: float is intentional for forcing float division

3485

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3490

f['stretched_ratio'] = ratio

3491

break

3492

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3493

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3494

if thumbnail_url:

3495

thumbnails.append({

3496

'url': thumbnail_url,

3497

})

3498

original_thumbnails = thumbnails.copy()

3499

3500

# The best resolution thumbnails sometimes does not appear in the webpage

3501

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3502

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3503

thumbnail_names = [

3504

# While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants

3505

# in resolution, these are not the custom thumbnail. So de-prioritize them

3506

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3507

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3508

]

3509

n_thumbnail_names = len(thumbnail_names)

3510

thumbnails.extend({

3511

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3512

video_id=video_id, name=name, ext=ext,

3513

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3514

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3515

for thumb in thumbnails:

3516

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3517

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3518

self._remove_duplicate_formats(thumbnails)

3519

self._downloader._sort_thumbnails(original_thumbnails)

3520

3521

category = get_first(microformats, 'category') or search_meta('genre')

3522

channel_id = str_or_none(

3523

get_first(video_details, 'channelId')

3524

or get_first(microformats, 'externalChannelId')

3525

or search_meta('channelId'))

3526

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3527

3528

live_content = get_first(video_details, 'isLiveContent')

3529

is_upcoming = get_first(video_details, 'isUpcoming')

3530

if is_live is None:

3531

if is_upcoming or live_content is False:

3532

is_live = False

3533

if is_upcoming is None and (live_content or is_live):

3534

is_upcoming = False

3535

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3536

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3537

if not duration and live_end_time and live_start_time:

3538

duration = live_end_time - live_start_time

3539

3540

if is_live and self.get_param('live_from_start'):

3541

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3542

3543

formats.extend(self._extract_storyboard(player_responses, duration))

3544

3545

# Source is given priority since formats that throttle are given lower source_preference

3546

# When throttling issue is fully fixed, remove this

3547

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3552

'formats': formats,

3553

'thumbnails': thumbnails,

3554

# The best thumbnail that we are sure exists. Prevents unnecessary

3555

# URL checking if user don't care about getting the best possible thumbnail

3556

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3557

'description': video_description,

3558

'uploader': get_first(video_details, 'author'),

3559

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3560

'uploader_url': owner_profile_url,

3561

'channel_id': channel_id,

3562

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3563

'duration': duration,

3564

'view_count': int_or_none(

3565

get_first((video_details, microformats), (..., 'viewCount'))

3566

or search_meta('interactionCount')),

3567

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3568

'age_limit': 18 if (

3569

get_first(microformats, 'isFamilySafe') is False

3570

or search_meta('isFamilyFriendly') == 'false'

3571

or search_meta('og:restrictions:age') == '18+') else 0,

3572

'webpage_url': webpage_url,

3573

'categories': [category] if category else None,

3574

'tags': keywords,

3575

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3576

'is_live': is_live,

3577

'was_live': (False if is_live or is_upcoming or live_content is False

3578

else None if is_live is None or is_upcoming is None

3579

else live_content),

3580

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3581

'release_timestamp': live_start_time,

3582

}

3583

3584

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3585

if pctr:

3586

def get_lang_code(track):

3587

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3588

or track.get('languageCode'))

3589

3590

# Converted into dicts to remove duplicates

3591

captions = {

3592

get_lang_code(sub): sub

3593

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3594

translation_languages = {

3595

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3596

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3597

3598

def process_language(container, base_url, lang_code, sub_name, query):

3599

lang_subs = container.setdefault(lang_code, [])

3600

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3611

for lang_code, caption_track in captions.items():

3612

base_url = caption_track.get('baseUrl')

3613

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3614

if not base_url:

3615

continue

3616

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3617

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3622

if not caption_track.get('isTranslatable'):

3623

continue

3624

for trans_code, trans_name in translation_languages.items():

3625

if not trans_code:

3626

continue

3627

orig_trans_code = trans_code

3628

if caption_track.get('kind') != 'asr':

3629

if 'translated_subs' in self._configuration_arg('skip'):

3630

continue

3631

trans_code += f'-{lang_code}'

3632

trans_name += format_field(lang_name, template=' from %s')

3633

# Add an "-orig" label to the original language so that it can be distinguished.

3634

# The subs are returned without "-orig" as well for compatibility

3635

if lang_code == f'a-{orig_trans_code}':

3636

process_language(

3637

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3638

# Setting tlang=lang returns damaged subtitles.

3639

process_language(automatic_captions, base_url, trans_code, trans_name,

3640

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3641

info['automatic_captions'] = automatic_captions

3642

info['subtitles'] = subtitles

3643

3644

parsed_url = compat_urllib_parse_urlparse(url)

3645

for component in [parsed_url.fragment, parsed_url.query]:

3646

query = compat_parse_qs(component)

3647

for k, v in query.items():

3648

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3649

d_k += '_time'

3650

if d_k not in info and k in s_ks:

3651

info[d_k] = parse_duration(query[k][0])

3652

3653

# Youtube Music Auto-generated description

3654

if video_description:

3655

mobj = re.search(

3656

r'''(?xs)

3657

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3658

(?P<album>[^\n]+)

3659

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3660

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3661

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3662

.+\nAuto-generated\ by\ YouTube\.\s*$

3663

''', video_description)

3664

if mobj:

3665

release_year = mobj.group('release_year')

3666

release_date = mobj.group('release_date')

3667

if release_date:

3668

release_date = release_date.replace('-', '')

3669

if not release_year:

3670

release_year = release_date[:4]

3671

info.update({

3672

'album': mobj.group('album'.strip()),

3673

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3674

'track': mobj.group('track').strip(),

3675

'release_date': release_date,

3676

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3682

webpage, self._YT_INITIAL_DATA_RE, video_id,

3683

'yt initial data')

3684

if not initial_data:

3685

query = {'videoId': video_id}

3686

query.update(self._get_checkok_params())

3687

initial_data = self._extract_response(

3688

item_id=video_id, ep='next', fatal=False,

3689

ytcfg=master_ytcfg, query=query,

3690

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3691

note='Downloading initial data API JSON')

3692

3693

try: # This will error if there is no livechat

3694

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3695

except (KeyError, IndexError, TypeError):

3696

pass

3697

else:

3698

info.setdefault('subtitles', {})['live_chat'] = [{

3699

'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies

3700

'video_id': video_id,

3701

'ext': 'json',

3702

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3708

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3709

or self._extract_chapters_from_description(video_description, duration)

3710

or None)

3711

3712

contents = traverse_obj(

3713

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3714

expected_type=list, default=[])

3715

3716

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3717

if vpir:

3718

stl = vpir.get('superTitleLink')

3719

if stl:

3720

stl = self._get_text(stl)

3721

if try_get(

3722

vpir,

3723

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3724

info['location'] = stl

3725

else:

3726

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3727

if mobj:

3728

info.update({

3729

'series': mobj.group(1),

3730

'season_number': int(mobj.group(2)),

3731

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3736

list) or []):

3737

tbr = tlb.get('toggleButtonRenderer') or {}

3738

for getter, regex in [(

3739

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3740

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3741

lambda x: x['accessibility'],

3742

lambda x: x['accessibilityData']['accessibilityData'],

3743

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3744

label = (try_get(tbr, getter, dict) or {}).get('label')

3745

if label:

3746

mobj = re.match(regex, label)

3747

if mobj:

3748

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3749

break

3750

sbr_tooltip = try_get(

3751

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3752

if sbr_tooltip:

3753

like_count, dislike_count = sbr_tooltip.split(' / ')

3754

info.update({

3755

'like_count': str_to_int(like_count),

3756

'dislike_count': str_to_int(dislike_count),

3757

})

3758

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3759

if vsir:

3760

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3761

info.update({

3762

'channel': self._get_text(vor, 'title'),

3763

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3768

list) or []

3769

multiple_songs = False

3770

for row in rows:

3771

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3772

multiple_songs = True

3773

break

3774

for row in rows:

3775

mrr = row.get('metadataRowRenderer') or {}

3776

mrr_title = mrr.get('title')

3777

if not mrr_title:

3778

continue

3779

mrr_title = self._get_text(mrr, 'title')

3780

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3781

if mrr_title == 'License':

3782

info['license'] = mrr_contents_text

3783

elif not multiple_songs:

3784

if mrr_title == 'Album':

3785

info['album'] = mrr_contents_text

3786

elif mrr_title == 'Artist':

3787

info['artist'] = mrr_contents_text

3788

elif mrr_title == 'Song':

3789

info['track'] = mrr_contents_text

3790

3791

fallbacks = {

3792

'channel': 'uploader',

3793

'channel_id': 'uploader_id',

3794

'channel_url': 'uploader_url',

3795

}

3796

3797

# The upload date for scheduled, live and past live streams / premieres in microformats

3798

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3799

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3800

upload_date = (

3801

unified_strdate(get_first(microformats, 'uploadDate'))

3802

or unified_strdate(search_meta('uploadDate')))

3803

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3804

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3805

info['upload_date'] = upload_date

3806

3807

for to, frm in fallbacks.items():

3808

if not info.get(to):

3809

info[to] = info.get(frm)

3810

3811

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3817

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3818

is_membersonly = None

3819

is_premium = None

3820

if initial_data and is_private is not None:

3821

is_membersonly = False

3822

is_premium = False

3823

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3824

badge_labels = set()

3825

for content in contents:

3826

if not isinstance(content, dict):

3827

continue

3828

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3829

for badge_label in badge_labels:

3830

if badge_label.lower() == 'members only':

3831

is_membersonly = True

3832

elif badge_label.lower() == 'premium':

3833

is_premium = True

3834

elif badge_label.lower() == 'unlisted':

3835

is_unlisted = True

3836

3837

info['availability'] = self._availability(

3838

is_private=is_private,

3839

needs_premium=is_premium,

3840

needs_subscription=is_membersonly,

3841

needs_auth=info['age_limit'] >= 18,

3842

is_unlisted=None if is_private is None else is_unlisted)

3843

3844

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3845

3846

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3852

3853

@staticmethod

3854

def passthrough_smuggled_data(func):

3855

def _smuggle(entries, smuggled_data):

3856

for entry in entries:

3857

# TODO: Convert URL to music.youtube instead.

3858

# Do we need to passthrough any other smuggled_data?

3859

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3860

yield entry

3861

3862

@functools.wraps(func)

3863

def wrapper(self, url):

3864

url, smuggled_data = unsmuggle_url(url, {})

3865

if self.is_music_url(url):

3866

smuggled_data['is_music_url'] = True

3867

info_dict = func(self, url, smuggled_data)

3868

if smuggled_data and info_dict.get('entries'):

3869

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3874

channel_id = self._html_search_meta(

3875

'channelId', webpage, 'channel id', default=None)

3876

if channel_id:

3877

return channel_id

3878

channel_url = self._html_search_meta(

3879

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3880

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3881

'twitter:app:url:googleplay'), webpage, 'channel url')

3882

return self._search_regex(

3883

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3884

channel_url, 'channel id')

3885

3886

@staticmethod

3887

def _extract_basic_item_renderer(item):

3888

# Modified from _extract_grid_item_renderer

3889

known_basic_renderers = (

3890

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3891

)

3892

for key, renderer in item.items():

3893

if not isinstance(renderer, dict):

3894

continue

3895

elif key in known_basic_renderers:

3896

return renderer

3897

elif key.startswith('grid') and key.endswith('Renderer'):

3898

return renderer

3899

3900

def _grid_entries(self, grid_renderer):

3901

for item in grid_renderer['items']:

3902

if not isinstance(item, dict):

3903

continue

3904

renderer = self._extract_basic_item_renderer(item)

3905

if not isinstance(renderer, dict):

3906

continue

3907

title = self._get_text(renderer, 'title')

3908

3909

# playlist

3910

playlist_id = renderer.get('playlistId')

3911

if playlist_id:

3912

yield self.url_result(

3913

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3914

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3919

if video_id:

3920

yield self._extract_video(renderer)

3921

continue

3922

# channel

3923

channel_id = renderer.get('channelId')

3924

if channel_id:

3925

yield self.url_result(

3926

'https://www.youtube.com/channel/%s' % channel_id,

3927

ie=YoutubeTabIE.ie_key(), video_title=title)

3928

continue

3929

# generic endpoint URL support

3930

ep_url = urljoin('https://www.youtube.com/', try_get(

3931

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3932

compat_str))

3933

if ep_url:

3934

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3935

if ie.suitable(ep_url):

3936

yield self.url_result(

3937

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3938

break

3939

3940

def _music_reponsive_list_entry(self, renderer):

3941

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3942

if video_id:

3943

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3944

ie=YoutubeIE.ie_key(), video_id=video_id)

3945

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3946

if playlist_id:

3947

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3948

if video_id:

3949

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3950

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3951

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3952

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3953

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3954

if browse_id:

3955

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3956

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3957

3958

def _shelf_entries_from_content(self, shelf_renderer):

3959

content = shelf_renderer.get('content')

3960

if not isinstance(content, dict):

3961

return

3962

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3963

if renderer:

3964

# TODO: add support for nested playlists so each shelf is processed

3965

# as separate playlist

3966

# TODO: this includes only first N items

3967

yield from self._grid_entries(renderer)

3968

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3974

ep = try_get(

3975

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3976

compat_str)

3977

shelf_url = urljoin('https://www.youtube.com', ep)

3978

if shelf_url:

3979

# Skipping links to another channels, note that checking for

3980

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3981

# will not work

3982

if skip_channels and '/channels?' in shelf_url:

3983

return

3984

title = self._get_text(shelf_renderer, 'title')

3985

yield self.url_result(shelf_url, video_title=title)

3986

# Shelf may not contain shelf URL, fallback to extraction from content

3987

yield from self._shelf_entries_from_content(shelf_renderer)

3988

3989

def _playlist_entries(self, video_list_renderer):

3990

for content in video_list_renderer['contents']:

3991

if not isinstance(content, dict):

3992

continue

3993

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3994

if not isinstance(renderer, dict):

3995

continue

3996

video_id = renderer.get('videoId')

3997

if not video_id:

3998

continue

3999

yield self._extract_video(renderer)

4000

4001

def _rich_entries(self, rich_grid_renderer):

4002

renderer = try_get(

4003

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4004

video_id = renderer.get('videoId')

4005

if not video_id:

4006

return

4007

yield self._extract_video(renderer)

4008

4009

def _video_entry(self, video_renderer):

4010

video_id = video_renderer.get('videoId')

4011

if video_id:

4012

return self._extract_video(video_renderer)

4013

4014

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4015

url = urljoin('https://youtube.com', traverse_obj(

4016

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4017

if url:

4018

return self.url_result(

4019

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4020

4021

def _post_thread_entries(self, post_thread_renderer):

4022

post_renderer = try_get(

4023

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4024

if not post_renderer:

4025

return

4026

# video attachment

4027

video_renderer = try_get(

4028

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4029

video_id = video_renderer.get('videoId')

4030

if video_id:

4031

entry = self._extract_video(video_renderer)

4032

if entry:

4033

yield entry

4034

# playlist attachment

4035

playlist_id = try_get(

4036

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

4037

if playlist_id:

4038

yield self.url_result(

4039

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4040

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4041

# inline video links

4042

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4043

for run in runs:

4044

if not isinstance(run, dict):

4045

continue

4046

ep_url = try_get(

4047

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

4048

if not ep_url:

4049

continue

4050

if not YoutubeIE.suitable(ep_url):

4051

continue

4052

ep_video_id = YoutubeIE._match_id(ep_url)

4053

if video_id == ep_video_id:

4054

continue

4055

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4056

4057

def _post_thread_continuation_entries(self, post_thread_continuation):

4058

contents = post_thread_continuation.get('contents')

4059

if not isinstance(contents, list):

4060

return

4061

for content in contents:

4062

renderer = content.get('backstagePostThreadRenderer')

4063

if isinstance(renderer, dict):

4064

yield from self._post_thread_entries(renderer)

4065

continue

4066

renderer = content.get('videoRenderer')

4067

if isinstance(renderer, dict):

4068

yield self._video_entry(renderer)

4069

4070

r''' # unused

4071

def _rich_grid_entries(self, contents):

4072

for content in contents:

4073

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4074

if video_renderer:

4075

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4081

# continuation_list is modified in-place with continuation_list = [continuation_token]

4082

continuation_list[:] = [None]

4083

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4084

for content in contents:

4085

if not isinstance(content, dict):

4086

continue

4087

is_renderer = traverse_obj(

4088

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4089

expected_type=dict)

4090

if not is_renderer:

4091

renderer = content.get('richItemRenderer')

4092

if renderer:

4093

for entry in self._rich_entries(renderer):

4094

yield entry

4095

continuation_list[0] = self._extract_continuation(parent_renderer)

4096

continue

4097

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4098

for isr_content in isr_contents:

4099

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4104

'gridRenderer': self._grid_entries,

4105

'reelShelfRenderer': self._grid_entries,

4106

'shelfRenderer': self._shelf_entries,

4107

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4108

'backstagePostThreadRenderer': self._post_thread_entries,

4109

'videoRenderer': lambda x: [self._video_entry(x)],

4110

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4111

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4112

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4113

}

4114

for key, renderer in isr_content.items():

4115

if key not in known_renderers:

4116

continue

4117

for entry in known_renderers[key](renderer):

4118

if entry:

4119

yield entry

4120

continuation_list[0] = self._extract_continuation(renderer)

4121

break

4122

4123

if not continuation_list[0]:

4124

continuation_list[0] = self._extract_continuation(is_renderer)

4125

4126

if not continuation_list[0]:

4127

continuation_list[0] = self._extract_continuation(parent_renderer)

4128

4129

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4130

continuation_list = [None]

4131

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4132

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4137

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4138

yield from extract_entries(parent_renderer)

4139

continuation = continuation_list[0]

4140

4141

for page_num in itertools.count(1):

4142

if not continuation:

4143

break

4144

headers = self.generate_api_headers(

4145

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4146

response = self._extract_response(

4147

item_id=f'{item_id} page {page_num}',

4148

query=continuation, headers=headers, ytcfg=ytcfg,

4149

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4154

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4155

visitor_data = self._extract_visitor_data(response) or visitor_data

4156

4157

known_continuation_renderers = {

4158

'playlistVideoListContinuation': self._playlist_entries,

4159

'gridContinuation': self._grid_entries,

4160

'itemSectionContinuation': self._post_thread_continuation_entries,

4161

'sectionListContinuation': extract_entries, # for feeds

4162

}

4163

continuation_contents = try_get(

4164

response, lambda x: x['continuationContents'], dict) or {}

4165

continuation_renderer = None

4166

for key, value in continuation_contents.items():

4167

if key not in known_continuation_renderers:

4168

continue

4169

continuation_renderer = value

4170

continuation_list = [None]

4171

yield from known_continuation_renderers[key](continuation_renderer)

4172

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4173

break

4174

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4179

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4180

'gridVideoRenderer': (self._grid_entries, 'items'),

4181

'gridChannelRenderer': (self._grid_entries, 'items'),

4182

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4183

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4184

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4185

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4186

}

4187

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4188

continuation_items = try_get(

4189

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4190

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4191

video_items_renderer = None

4192

for key, value in continuation_item.items():

4193

if key not in known_renderers:

4194

continue

4195

video_items_renderer = {known_renderers[key][1]: continuation_items}

4196

continuation_list = [None]

4197

yield from known_renderers[key][0](video_items_renderer)

4198

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4199

break

4200

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4206

for tab in tabs:

4207

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4208

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4213

4214

def _extract_uploader(self, data):

4215

uploader = {}

4216

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4217

owner = try_get(

4218

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4219

if owner:

4220

owner_text = owner.get('text')

4221

uploader['uploader'] = self._search_regex(

4222

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4223

uploader['uploader_id'] = try_get(

4224

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4225

uploader['uploader_url'] = urljoin(

4226

'https://www.youtube.com/',

4227

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4228

return {k: v for k, v in uploader.items() if v is not None}

4229

4230

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4231

playlist_id = title = description = channel_url = channel_name = channel_id = None

4232

tags = []

4233

4234

selected_tab = self._extract_selected_tab(tabs)

4235

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4236

renderer = try_get(

4237

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4238

if renderer:

4239

channel_name = renderer.get('title')

4240

channel_url = renderer.get('channelUrl')

4241

channel_id = renderer.get('externalId')

4242

else:

4243

renderer = try_get(

4244

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4245

4246

if renderer:

4247

title = renderer.get('title')

4248

description = renderer.get('description', '')

4249

playlist_id = channel_id

4250

tags = renderer.get('keywords', '').split()

4251

4252

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4253

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4254

def _get_uncropped(url):

4255

return url_or_none((url or '').split('=')[0] + '=s0')

4256

4257

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4258

if avatar_thumbnails:

4259

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4260

if uncropped_avatar:

4261

avatar_thumbnails.append({

4262

'url': uncropped_avatar,

4263

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4268

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4269

for banner in channel_banners:

4270

banner['preference'] = -10

4271

4272

if channel_banners:

4273

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4274

if uncropped_banner:

4275

channel_banners.append({

4276

'url': uncropped_banner,

4277

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4282

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4283

4284

if playlist_id is None:

4285

playlist_id = item_id

4286

4287

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4288

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4289

if title is None:

4290

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4291

title += format_field(selected_tab, 'title', ' - %s')

4292

title += format_field(selected_tab, 'expandedText', ' - %s')

4293

4294

metadata = {

4295

'playlist_id': playlist_id,

4296

'playlist_title': title,

4297

'playlist_description': description,

4298

'uploader': channel_name,

4299

'uploader_id': channel_id,

4300

'uploader_url': channel_url,

4301

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4302

'tags': tags,

4303

'view_count': self._get_count(playlist_stats, 1),

4304

'availability': self._extract_availability(data),

4305

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4306

'playlist_count': self._get_count(playlist_stats, 0),

4307

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4308

}

4309

if not channel_id:

4310

metadata.update(self._extract_uploader(data))

4311

metadata.update({

4312

'channel': metadata['uploader'],

4313

'channel_id': metadata['uploader_id'],

4314

'channel_url': metadata['uploader_url']})

4315

return self.playlist_result(

4316

self._entries(

4317

selected_tab, playlist_id, ytcfg,

4318

self._extract_account_syncid(ytcfg, data),

4319

self._extract_visitor_data(data, ytcfg)),

4320

**metadata)

4321

4322

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4323

first_id = last_id = response = None

4324

for page_num in itertools.count(1):

4325

videos = list(self._playlist_entries(playlist))

4326

if not videos:

4327

return

4328

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4329

if start >= len(videos):

4330

return

4331

yield from videos[start:]

4332

first_id = first_id or videos[0]['id']

4333

last_id = videos[-1]['id']

4334

watch_endpoint = try_get(

4335

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4336

headers = self.generate_api_headers(

4337

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4338

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4339

query = {

4340

'playlistId': playlist_id,

4341

'videoId': watch_endpoint.get('videoId') or last_id,

4342

'index': watch_endpoint.get('index') or len(videos),

4343

'params': watch_endpoint.get('params') or 'OAE%3D'

4344

}

4345

response = self._extract_response(

4346

item_id='%s page %d' % (playlist_id, page_num),

4347

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4348

check_get_keys='contents'

4349

)

4350

playlist = try_get(

4351

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4352

4353

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4354

title = playlist.get('title') or try_get(

4355

data, lambda x: x['titleText']['simpleText'], compat_str)

4356

playlist_id = playlist.get('playlistId') or item_id

4357

4358

# Delegating everything except mix playlists to regular tab-based playlist URL

4359

playlist_url = urljoin(url, try_get(

4360

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4361

compat_str))

4362

4363

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4364

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4365

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4366

4367

if playlist_url and playlist_url != url and not is_known_unviewable:

4368

return self.url_result(

4369

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4370

video_title=title)

4371

4372

return self.playlist_result(

4373

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4374

playlist_id=playlist_id, playlist_title=title)

4375

4376

def _extract_availability(self, data):

4377

"""

4378

Gets the availability of a given playlist/tab.

4379

Note: Unless YouTube tells us explicitly, we do not assume it is public

4380

@param data: response

4381

"""

4382

is_private = is_unlisted = None

4383

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4384

badge_labels = self._extract_badges(renderer)

4385

4386

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4387

privacy_dropdown_entries = try_get(

4388

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4389

for renderer_dict in privacy_dropdown_entries:

4390

is_selected = try_get(

4391

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4392

if not is_selected:

4393

continue

4394

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4395

if label:

4396

badge_labels.add(label.lower())

4397

break

4398

4399

for badge_label in badge_labels:

4400

if badge_label == 'unlisted':

4401

is_unlisted = True

4402

elif badge_label == 'private':

4403

is_private = True

4404

elif badge_label == 'public':

4405

is_unlisted = is_private = False

4406

return self._availability(is_private, False, False, False, is_unlisted)

4407

4408

@staticmethod

4409

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4410

sidebar_renderer = try_get(

4411

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4412

for item in sidebar_renderer:

4413

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4418

"""

4419

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4420

"""

4421

browse_id = params = None

4422

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4423

if not renderer:

4424

return

4425

menu_renderer = try_get(

4426

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4427

for menu_item in menu_renderer:

4428

if not isinstance(menu_item, dict):

4429

continue

4430

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4431

text = try_get(

4432

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4433

if not text or text.lower() != 'show unavailable videos':

4434

continue

4435

browse_endpoint = try_get(

4436

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4437

browse_id = browse_endpoint.get('browseId')

4438

params = browse_endpoint.get('params')

4439

break

4440

4441

headers = self.generate_api_headers(

4442

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4443

visitor_data=self._extract_visitor_data(data, ytcfg))

4444

query = {

4445

'params': params or 'wgYCCAA=',

4446

'browseId': browse_id or 'VL%s' % item_id

4447

}

4448

return self._extract_response(

4449

item_id=item_id, headers=headers, query=query,

4450

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4451

note='Downloading API JSON with unavailable videos')

4452

4453

@functools.cached_property

4454

def skip_webpage(self):

4455

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4456

4457

def _extract_webpage(self, url, item_id, fatal=True):

4458

retries = self.get_param('extractor_retries', 3)

4459

count = -1

4460

webpage = data = last_error = None

4461

while count < retries:

4462

count += 1

4463

# Sometimes youtube returns a webpage with incomplete ytInitialData

4464

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4465

if last_error:

4466

self.report_warning('%s. Retrying ...' % last_error)

4467

try:

4468

webpage = self._download_webpage(

4469

url, item_id,

4470

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4471

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4472

except ExtractorError as e:

4473

if isinstance(e.cause, network_exceptions):

4474

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4475

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4485

except ExtractorError as e:

4486

if fatal:

4487

raise

4488

self.report_warning(error_to_compat_str(e))

4489

break

4490

4491

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4492

break

4493

4494

last_error = 'Incomplete yt initial data received'

4495

if count >= retries:

4496

if fatal:

4497

raise ExtractorError(last_error)

4498

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4504

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4505

if not ytcfg and self.is_authenticated:

4506

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4507

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4508

raise ExtractorError(

4509

f'{msg}. If you are not downloading private content, or '

4510

'your cookies are only for the first account and channel,'

4511

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4512

expected=True)

4513

self.report_warning(msg, only_once=True)

4514

4515

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4516

data = None

4517

if not self.skip_webpage:

4518

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4519

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4520

# Reject webpage data if redirected to home page without explicitly requesting

4521

selected_tab = self._extract_selected_tab(traverse_obj(

4522

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4523

if (url != 'https://www.youtube.com/feed/recommended'

4524

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4525

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4526

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4527

if fatal:

4528

raise ExtractorError(msg, expected=True)

4529

self.report_warning(msg, only_once=True)

4530

if not data:

4531

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4532

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4533

return data, ytcfg

4534

4535

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4536

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4537

resolve_response = self._extract_response(

4538

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4539

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4540

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4541

for ep_key, ep in endpoints.items():

4542

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4543

if params:

4544

return self._extract_response(

4545

item_id=item_id, query=params, ep=ep, headers=headers,

4546

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4547

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4548

err_note = 'Failed to resolve url (does the playlist exist?)'

4549

if fatal:

4550

raise ExtractorError(err_note, expected=True)

4551

self.report_warning(err_note, item_id)

4552

4553

_SEARCH_PARAMS = None

4554

4555

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4556

data = {'query': query}

4557

if params is NO_DEFAULT:

4558

params = self._SEARCH_PARAMS

4559

if params:

4560

data['params'] = params

4561

4562

content_keys = (

4563

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4564

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4565

# ytmusic search

4566

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4567

('continuationContents', ),

4568

)

4569

display_id = f'query "{query}"'

4570

check_get_keys = tuple({keys[0] for keys in content_keys})

4571

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4572

self._report_playlist_authcheck(ytcfg, fatal=False)

4573

4574

continuation_list = [None]

4575

search = None

4576

for page_num in itertools.count(1):

4577

data.update(continuation_list[0] or {})

4578

headers = self.generate_api_headers(

4579

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4580

search = self._extract_response(

4581

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4582

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4583

slr_contents = traverse_obj(search, *content_keys)

4584

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4585

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4590

IE_DESC = 'YouTube Tabs'

4591

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4600

(?P<not_channel>

4601

feed/|hashtag/|

4602

(?:playlist|watch)\?.*?\blist=

4603

)|

4604

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4609

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4610

}

4611

IE_NAME = 'youtube:tab'

4612

4613

_TESTS = [{

4614

'note': 'playlists, multipage',

4615

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4616

'playlist_mincount': 94,

4617

'info_dict': {

4618

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4619

'title': 'Igor Kleiner - Playlists',

4620

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4621

'uploader': 'Igor Kleiner',

4622

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4623

'channel': 'Igor Kleiner',

4624

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4625

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4626

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4627

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4628

'channel_follower_count': int

4629

},

4630

}, {

4631

'note': 'playlists, multipage, different order',

4632

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4633

'playlist_mincount': 94,

4634

'info_dict': {

4635

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4636

'title': 'Igor Kleiner - Playlists',

4637

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4638

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4639

'uploader': 'Igor Kleiner',

4640

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4641

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4642

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4643

'channel': 'Igor Kleiner',

4644

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4645

'channel_follower_count': int

4646

},

4647

}, {

4648

'note': 'playlists, series',

4649

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4650

'playlist_mincount': 5,

4651

'info_dict': {

4652

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4653

'title': '3Blue1Brown - Playlists',

4654

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4655

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4656

'uploader': '3Blue1Brown',

4657

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4658

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4659

'channel': '3Blue1Brown',

4660

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4661

'tags': ['Mathematics'],

4662

'channel_follower_count': int

4663

},

4664

}, {

4665

'note': 'playlists, singlepage',

4666

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4667

'playlist_mincount': 4,

4668

'info_dict': {

4669

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4670

'title': 'ThirstForScience - Playlists',

4671

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4672

'uploader': 'ThirstForScience',

4673

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4674

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4675

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4676

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4677

'tags': 'count:13',

4678

'channel': 'ThirstForScience',

4679

'channel_follower_count': int

4680

}

4681

}, {

4682

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4683

'only_matching': True,

4684

}, {

4685

'note': 'basic, single video playlist',

4686

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4687

'info_dict': {

4688

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4689

'uploader': 'Sergey M.',

4690

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4691

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4696

'channel': 'Sergey M.',

4697

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4698

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4699

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4704

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4705

'info_dict': {

4706

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4707

'uploader': 'Sergey M.',

4708

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4709

'title': 'youtube-dl empty playlist',

4710

'tags': [],

4711

'channel': 'Sergey M.',

4712

'description': '',

4713

'modified_date': '20160902',

4714

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4715

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4716

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4722

'info_dict': {

4723

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4724

'title': 'lex will - Home',

4725

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4726

'uploader': 'lex will',

4727

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4728

'channel': 'lex will',

4729

'tags': ['bible', 'history', 'prophesy'],

4730

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4731

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4732

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4733

'channel_follower_count': int

4734

},

4735

'playlist_mincount': 2,

4736

}, {

4737

'note': 'Videos tab',

4738

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4739

'info_dict': {

4740

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4741

'title': 'lex will - Videos',

4742

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4743

'uploader': 'lex will',

4744

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4745

'tags': ['bible', 'history', 'prophesy'],

4746

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4747

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4748

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4749

'channel': 'lex will',

4750

'channel_follower_count': int

4751

},

4752

'playlist_mincount': 975,

4753

}, {

4754

'note': 'Videos tab, sorted by popular',

4755

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4756

'info_dict': {

4757

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4758

'title': 'lex will - Videos',

4759

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4760

'uploader': 'lex will',

4761

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4762

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4763

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4764

'channel': 'lex will',

4765

'tags': ['bible', 'history', 'prophesy'],

4766

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4767

'channel_follower_count': int

4768

},

4769

'playlist_mincount': 199,

4770

}, {

4771

'note': 'Playlists tab',

4772

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4773

'info_dict': {

4774

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4775

'title': 'lex will - Playlists',

4776

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4777

'uploader': 'lex will',

4778

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4779

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4780

'channel': 'lex will',

4781

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4782

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'tags': ['bible', 'history', 'prophesy'],

4784

'channel_follower_count': int

4785

},

4786

'playlist_mincount': 17,

4787

}, {

4788

'note': 'Community tab',

4789

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4790

'info_dict': {

4791

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4792

'title': 'lex will - Community',

4793

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4794

'uploader': 'lex will',

4795

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4796

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4797

'channel': 'lex will',

4798

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4799

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'tags': ['bible', 'history', 'prophesy'],

4801

'channel_follower_count': int

4802

},

4803

'playlist_mincount': 18,

4804

}, {

4805

'note': 'Channels tab',

4806

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4807

'info_dict': {

4808

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'title': 'lex will - Channels',

4810

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4811

'uploader': 'lex will',

4812

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4813

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4814

'channel': 'lex will',

4815

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4816

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4817

'tags': ['bible', 'history', 'prophesy'],

4818

'channel_follower_count': int

4819

},

4820

'playlist_mincount': 12,

4821

}, {

4822

'note': 'Search tab',

4823

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4824

'playlist_mincount': 40,

4825

'info_dict': {

4826

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4827

'title': '3Blue1Brown - Search - linear algebra',

4828

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4829

'uploader': '3Blue1Brown',

4830

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4831

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4832

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4833

'tags': ['Mathematics'],

4834

'channel': '3Blue1Brown',

4835

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4836

'channel_follower_count': int

4837

},

4838

}, {

4839

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4840

'only_matching': True,

4841

}, {

4842

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4843

'only_matching': True,

4844

}, {

4845

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4846

'only_matching': True,

4847

}, {

4848

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4849

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4850

'info_dict': {

4851

'title': '29C3: Not my department',

4852

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4853

'uploader': 'Christiaan008',

4854

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4855

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4856

'tags': [],

4857

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4858

'view_count': int,

4859

'modified_date': '20150605',

4860

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4861

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4862

'channel': 'Christiaan008',

4863

},

4864

'playlist_count': 96,

4865

}, {

4866

'note': 'Large playlist',

4867

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4868

'info_dict': {

4869

'title': 'Uploads from Cauchemar',

4870

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4871

'uploader': 'Cauchemar',

4872

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4873

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4874

'tags': [],

4875

'modified_date': r're:\d{8}',

4876

'channel': 'Cauchemar',

4877

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4878

'view_count': int,

4879

'description': '',

4880

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4881

},

4882

'playlist_mincount': 1123,

4883

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4884

}, {

4885

'note': 'even larger playlist, 8832 videos',

4886

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4887

'only_matching': True,

4888

}, {

4889

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4890

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4891

'info_dict': {

4892

'title': 'Uploads from Interstellar Movie',

4893

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4894

'uploader': 'Interstellar Movie',

4895

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4896

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4897

'tags': [],

4898

'view_count': int,

4899

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4900

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4901

'channel': 'Interstellar Movie',

4902

'description': '',

4903

'modified_date': r're:\d{8}',

4904

},

4905

'playlist_mincount': 21,

4906

}, {

4907

'note': 'Playlist with "show unavailable videos" button',

4908

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4909

'info_dict': {

4910

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4911

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4912

'uploader': 'Phim Siêu Nhân Nhật Bản',

4913

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4914

'view_count': int,

4915

'channel': 'Phim Siêu Nhân Nhật Bản',

4916

'tags': [],

4917

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4918

'description': '',

4919

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4920

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4921

'modified_date': r're:\d{8}',

4922

},

4923

'playlist_mincount': 200,

4924

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4925

}, {

4926

'note': 'Playlist with unavailable videos in page 7',

4927

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4928

'info_dict': {

4929

'title': 'Uploads from BlankTV',

4930

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4931

'uploader': 'BlankTV',

4932

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4933

'channel': 'BlankTV',

4934

'channel_url': 'https://www.youtube.com/c/blanktv',

4935

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4936

'view_count': int,

4937

'tags': [],

4938

'uploader_url': 'https://www.youtube.com/c/blanktv',

4939

'modified_date': r're:\d{8}',

4940

'description': '',

4941

},

4942

'playlist_mincount': 1000,

4943

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4944

}, {

4945

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4946

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4947

'info_dict': {

4948

'title': 'Data Analysis with Dr Mike Pound',

4949

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4950

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4951

'uploader': 'Computerphile',

4952

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4953

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4954

'tags': [],

4955

'view_count': int,

4956

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4957

'channel_url': 'https://www.youtube.com/user/Computerphile',

4958

'channel': 'Computerphile',

4959

},

4960

'playlist_mincount': 11,

4961

}, {

4962

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4963

'only_matching': True,

4964

}, {

4965

'note': 'Playlist URL that does not actually serve a playlist',

4966

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4971

'uploader': 'STREEM',

4972

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4973

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4974

'upload_date': '20150526',

4975

'license': 'Standard YouTube License',

4976

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4977

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4984

},

4985

'skip': 'This video is not available.',

4986

'add_ie': [YoutubeIE.ie_key()],

4987

}, {

4988

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4989

'only_matching': True,

4990

}, {

4991

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4992

'only_matching': True,

4993

}, {

4994

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4995

'info_dict': {

4996

'id': 'GgL890LIznQ', # This will keep changing

4997

'ext': 'mp4',

4998

'title': str,

4999

'uploader': 'Sky News',

5000

'uploader_id': 'skynews',

5001

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5002

'upload_date': r're:\d{8}',

5003

'description': str,

5004

'categories': ['News & Politics'],

5005

'tags': list,

5006

'like_count': int,

5007

'release_timestamp': 1642502819,

5008

'channel': 'Sky News',

5009

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5010

'age_limit': 0,

5011

'view_count': int,

5012

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5013

'playable_in_embed': True,

5014

'release_date': '20220118',

5015

'availability': 'public',

5016

'live_status': 'is_live',

5017

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5018

'channel_follower_count': int

5019

},

5020

'params': {

5021

'skip_download': True,

5022

},

5023

'expected_warnings': ['Ignoring subtitle tracks found in '],

5024

}, {

5025

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5030

'uploader': 'The Young Turks',

5031

'uploader_id': 'TheYoungTurks',

5032

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5033

'upload_date': '20150715',

5034

'license': 'Standard YouTube License',

5035

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5036

'categories': ['News & Politics'],

5037

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5042

},

5043

'only_matching': True,

5044

}, {

5045

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5046

'only_matching': True,

5047

}, {

5048

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5049

'only_matching': True,

5050

}, {

5051

'note': 'A channel that is not live. Should raise error',

5052

'url': 'https://www.youtube.com/user/numberphile/live',

5053

'only_matching': True,

5054

}, {

5055

'url': 'https://www.youtube.com/feed/trending',

5056

'only_matching': True,

5057

}, {

5058

'url': 'https://www.youtube.com/feed/library',

5059

'only_matching': True,

5060

}, {

5061

'url': 'https://www.youtube.com/feed/history',

5062

'only_matching': True,

5063

}, {

5064

'url': 'https://www.youtube.com/feed/subscriptions',

5065

'only_matching': True,

5066

}, {

5067

'url': 'https://www.youtube.com/feed/watch_later',

5068

'only_matching': True,

5069

}, {

5070

'note': 'Recommended - redirects to home page.',

5071

'url': 'https://www.youtube.com/feed/recommended',

5072

'only_matching': True,

5073

}, {

5074

'note': 'inline playlist with not always working continuations',

5075

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5076

'only_matching': True,

5077

}, {

5078

'url': 'https://www.youtube.com/course',

5079

'only_matching': True,

5080

}, {

5081

'url': 'https://www.youtube.com/zsecurity',

5082

'only_matching': True,

5083

}, {

5084

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5085

'only_matching': True,

5086

}, {

5087

'url': 'https://www.youtube.com/TheYoungTurks/live',

5088

'only_matching': True,

5089

}, {

5090

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5097

}, {

5098

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5099

'only_matching': True,

5100

}, {

5101

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5102

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5103

'only_matching': True

5104

}, {

5105

'note': '/browse/ should redirect to /channel/',

5106

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5107

'only_matching': True

5108

}, {

5109

'note': 'VLPL, should redirect to playlist?list=PL...',

5110

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5111

'info_dict': {

5112

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5113

'uploader': 'NoCopyrightSounds',

5114

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5115

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5116

'title': 'NCS Releases',

5117

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5118

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5119

'modified_date': r're:\d{8}',

5120

'view_count': int,

5121

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5122

'tags': [],

5123

'channel': 'NoCopyrightSounds',

5124

},

5125

'playlist_mincount': 166,

5126

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5127

}, {

5128

'note': 'Topic, should redirect to playlist?list=UU...',

5129

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5130

'info_dict': {

5131

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5132

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5133

'title': 'Uploads from Royalty Free Music - Topic',

5134

'uploader': 'Royalty Free Music - Topic',

5135

'tags': [],

5136

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5137

'channel': 'Royalty Free Music - Topic',

5138

'view_count': int,

5139

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5140

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5141

'modified_date': r're:\d{8}',

5142

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5143

'description': '',

5144

},

5145

'expected_warnings': [

5146

'The URL does not have a videos tab',

5147

r'[Uu]navailable videos (are|will be) hidden',

5148

],

5149

'playlist_mincount': 101,

5150

}, {

5151

'note': 'Topic without a UU playlist',

5152

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5153

'info_dict': {

5154

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5155

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5156

'tags': [],

5157

},

5158

'expected_warnings': [

5159

'the playlist redirect gave error',

5160

],

5161

'playlist_mincount': 9,

5162

}, {

5163

'note': 'Youtube music Album',

5164

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5165

'info_dict': {

5166

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5167

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5172

'modified_date': r're:\d{8}',

5173

},

5174

'playlist_count': 50,

5175

}, {

5176

'note': 'unlisted single video playlist',

5177

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5178

'info_dict': {

5179

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5180

'uploader': 'colethedj',

5181

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5182

'title': 'yt-dlp unlisted playlist test',

5183

'availability': 'unlisted',

5184

'tags': [],

5185

'modified_date': '20211208',

5186

'channel': 'colethedj',

5187

'view_count': int,

5188

'description': '',

5189

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5190

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5191

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5196

'url': 'https://www.youtube.com/feed/recommended',

5197

'info_dict': {

5198

'id': 'recommended',

5199

'title': 'recommended',

5200

'tags': [],

5201

},

5202

'playlist_mincount': 50,

5203

'params': {

5204

'skip_download': True,

5205

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5206

},

5207

}, {

5208

'note': 'API Fallback: /videos tab, sorted by oldest first',

5209

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5210

'info_dict': {

5211

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5212

'title': 'Cody\'sLab - Videos',

5213

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5214

'uploader': 'Cody\'sLab',

5215

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5216

'channel': 'Cody\'sLab',

5217

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5218

'tags': [],

5219

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5220

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5221

'channel_follower_count': int

5222

},

5223

'playlist_mincount': 650,

5224

'params': {

5225

'skip_download': True,

5226

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5227

},

5228

}, {

5229

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5230

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5231

'info_dict': {

5232

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5233

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5234

'title': 'Uploads from Royalty Free Music - Topic',

5235

'uploader': 'Royalty Free Music - Topic',

5236

'modified_date': r're:\d{8}',

5237

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5238

'description': '',

5239

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5240

'tags': [],

5241

'channel': 'Royalty Free Music - Topic',

5242

'view_count': int,

5243

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5244

},

5245

'expected_warnings': [

5246

'does not have a videos tab',

5247

r'[Uu]navailable videos (are|will be) hidden',

5248

],

5249

'playlist_mincount': 101,

5250

'params': {

5251

'skip_download': True,

5252

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5253

},

5254

}, {

5255

'note': 'non-standard redirect to regional channel',

5256

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5257

'only_matching': True

5258

}, {

5259

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5260

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5261

'info_dict': {

5262

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5263

'modified_date': '20220407',

5264

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5265

'tags': [],

5266

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5267

'uploader': 'pukkandan',

5268

'availability': 'unlisted',

5269

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5270

'channel': 'pukkandan',

5271

'description': 'Test for collaborative playlist',

5272

'title': 'yt-dlp test - collaborative playlist',

5273

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5274

},

5275

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5280

return False if YoutubeIE.suitable(url) else super().suitable(url)

5281

5282

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5283

5284

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5285

def _real_extract(self, url, smuggled_data):

5286

item_id = self._match_id(url)

5287

url = compat_urlparse.urlunparse(

5288

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5289

compat_opts = self.get_param('compat_opts', [])

5290

5291

def get_mobj(url):

5292

mobj = self._URL_RE.match(url).groupdict()

5293

mobj.update((k, '') for k, v in mobj.items() if v is None)

5294

return mobj

5295

5296

mobj, redirect_warning = get_mobj(url), None

5297

# Youtube returns incomplete data if tabname is not lower case

5298

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5299

if is_channel:

5300

if smuggled_data.get('is_music_url'):

5301

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5302

item_id = item_id[2:]

5303

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5304

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5305

mdata = self._extract_tab_endpoint(

5306

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5307

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5308

get_all=False, expected_type=compat_str)

5309

if not murl:

5310

raise ExtractorError('Failed to resolve album to playlist')

5311

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5312

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5313

pre = f'https://www.youtube.com/channel/{item_id}'

5314

5315

original_tab_name = tab

5316

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5317

# Home URLs should redirect to /videos/

5318

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5319

'To download only the videos in the home page, add a "/featured" to the URL')

5320

tab = '/videos'

5321

5322

url = ''.join((pre, tab, post))

5323

mobj = get_mobj(url)

5324

5325

# Handle both video/playlist URLs

5326

qs = parse_qs(url)

5327

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5328

5329

if not video_id and mobj['not_channel'].startswith('watch'):

5330

if not playlist_id:

5331

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5332

raise ExtractorError('Unable to recognize tab page')

5333

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5334

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5335

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5336

mobj = get_mobj(url)

5337

5338

if video_id and playlist_id:

5339

if self.get_param('noplaylist'):

5340

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5341

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5342

ie=YoutubeIE.ie_key(), video_id=video_id)

5343

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5344

5345

data, ytcfg = self._extract_data(url, item_id)

5346

5347

# YouTube may provide a non-standard redirect to the regional channel

5348

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5349

redirect_url = traverse_obj(

5350

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5351

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5352

redirect_url = ''.join((

5353

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5354

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5355

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5356

5357

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5358

if tabs:

5359

selected_tab = self._extract_selected_tab(tabs)

5360

selected_tab_name = selected_tab.get('title', '').lower()

5361

if selected_tab_name == 'home':

5362

selected_tab_name = 'featured'

5363

requested_tab_name = mobj['tab'][1:]

5364

if 'no-youtube-channel-redirect' not in compat_opts:

5365

if requested_tab_name == 'live':

5366

# Live tab should have redirected to the video

5367

raise ExtractorError('The channel is not currently live', expected=True)

5368

if requested_tab_name not in ('', selected_tab_name):

5369

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5370

if not original_tab_name:

5371

if item_id[:2] == 'UC':

5372

# Topic channels don't have /videos. Use the equivalent playlist instead

5373

pl_id = f'UU{item_id[2:]}'

5374

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5375

try:

5376

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5377

except ExtractorError:

5378

redirect_warning += ' and the playlist redirect gave error'

5379

else:

5380

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5381

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5382

if selected_tab_name and selected_tab_name != requested_tab_name:

5383

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5384

else:

5385

raise ExtractorError(redirect_warning, expected=True)

5386

5387

if redirect_warning:

5388

self.to_screen(redirect_warning)

5389

self.write_debug(f'Final URL: {url}')

5390

5391

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5392

if 'no-youtube-unavailable-videos' not in compat_opts:

5393

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5394

self._extract_and_report_alerts(data, only_once=True)

5395

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5396

if tabs:

5397

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5398

5399

playlist = traverse_obj(

5400

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5401

if playlist:

5402

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5403

5404

video_id = traverse_obj(

5405

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5406

if video_id:

5407

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5408

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5409

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5410

ie=YoutubeIE.ie_key(), video_id=video_id)

5411

5412

raise ExtractorError('Unable to recognize tab page')

5413

5414

5415

class YoutubePlaylistIE(InfoExtractor):

5416

IE_DESC = 'YouTube playlists'

5417

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5428

)''' % {

5429

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5430

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5431

}

5432

IE_NAME = 'youtube:playlist'

5433

_TESTS = [{

5434

'note': 'issue #673',

5435

'url': 'PLBB231211A4F62143',

5436

'info_dict': {

5437

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5438

'id': 'PLBB231211A4F62143',

5439

'uploader': 'Wickman',

5440

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5441

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5442

'view_count': int,

5443

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5444

'modified_date': r're:\d{8}',

5445

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5446

'channel': 'Wickman',

5447

'tags': [],

5448

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5449

},

5450

'playlist_mincount': 29,

5451

}, {

5452

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5453

'info_dict': {

5454

'title': 'YDL_safe_search',

5455

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5456

},

5457

'playlist_count': 2,

5458

'skip': 'This playlist is private',

5459

}, {

5460

'note': 'embedded',

5461

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5466

'uploader': 'milan',

5467

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5468

'description': '',

5469

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5470

'tags': [],

5471

'modified_date': '20140919',

5472

'view_count': int,

5473

'channel': 'milan',

5474

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5475

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5476

},

5477

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5478

}, {

5479

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5480

'playlist_mincount': 654,

5481

'info_dict': {

5482

'title': '2018 Chinese New Singles (11/6 updated)',

5483

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5484

'uploader': 'LBK',

5485

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5486

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5487

'channel': 'LBK',

5488

'view_count': int,

5489

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5490

'tags': [],

5491

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5492

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5493

'modified_date': r're:\d{8}',

5494

},

5495

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5496

}, {

5497

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5498

'only_matching': True,

5499

}, {

5500

# music album playlist

5501

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5502

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5507

if YoutubeTabIE.suitable(url):

5508

return False

5509

from ..utils import parse_qs

5510

qs = parse_qs(url)

5511

if qs.get('v', [None])[0]:

5512

return False

5513

return super().suitable(url)

5514

5515

def _real_extract(self, url):

5516

playlist_id = self._match_id(url)

5517

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5518

url = update_url_query(

5519

'https://www.youtube.com/playlist',

5520

parse_qs(url) or {'list': playlist_id})

5521

if is_music_url:

5522

url = smuggle_url(url, {'is_music_url': True})

5523

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5524

5525

5526

class YoutubeYtBeIE(InfoExtractor):

5527

IE_DESC = 'youtu.be'

5528

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5529

_TESTS = [{

5530

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5535

'uploader': 'Backus-Page House Museum',

5536

'uploader_id': 'backuspagemuseum',

5537

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5538

'upload_date': '20161008',

5539

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5540

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5545

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5546

'channel': 'Backus-Page House Museum',

5547

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5548

'live_status': 'not_live',

5549

'view_count': int,

5550

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5551

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5557

},

5558

}, {

5559

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5560

'only_matching': True,

5561

}]

5562

5563

def _real_extract(self, url):

5564

mobj = self._match_valid_url(url)

5565

video_id = mobj.group('id')

5566

playlist_id = mobj.group('playlist_id')

5567

return self.url_result(

5568

update_url_query('https://www.youtube.com/watch', {

5569

'v': video_id,

5570

'list': playlist_id,

5571

'feature': 'youtu.be',

5572

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5573

5574

5575

class YoutubeLivestreamEmbedIE(InfoExtractor):

5576

IE_DESC = 'YouTube livestream embeds'

5577

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5578

_TESTS = [{

5579

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5580

'only_matching': True,

5581

}]

5582

5583

def _real_extract(self, url):

5584

channel_id = self._match_id(url)

5585

return self.url_result(

5586

f'https://www.youtube.com/channel/{channel_id}/live',

5587

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5588

5589

5590

class YoutubeYtUserIE(InfoExtractor):

5591

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5592

IE_NAME = 'youtube:user'

5593

_VALID_URL = r'ytuser:(?P<id>.+)'

5594

_TESTS = [{

5595

'url': 'ytuser:phihag',

5596

'only_matching': True,

5597

}]

5598

5599

def _real_extract(self, url):

5600

user_id = self._match_id(url)

5601

return self.url_result(

5602

'https://www.youtube.com/user/%s/videos' % user_id,

5603

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5604

5605

5606

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5607

IE_NAME = 'youtube:favorites'

5608

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5609

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5610

_LOGIN_REQUIRED = True

5611

_TESTS = [{

5612

'url': ':ytfav',

5613

'only_matching': True,

5614

}, {

5615

'url': ':ytfavorites',

5616

'only_matching': True,

5617

}]

5618

5619

def _real_extract(self, url):

5620

return self.url_result(

5621

'https://www.youtube.com/playlist?list=LL',

5622

ie=YoutubeTabIE.ie_key())

5623

5624

5625

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5626

IE_NAME = 'youtube:notif'

5627

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5628

_VALID_URL = r':ytnotif(?:ication)?s?'

5629

_LOGIN_REQUIRED = True

5630

_TESTS = [{

5631

'url': ':ytnotif',

5632

'only_matching': True,

5633

}, {

5634

'url': ':ytnotifications',

5635

'only_matching': True,

5636

}]

5637

5638

def _extract_notification_menu(self, response, continuation_list):

5639

notification_list = traverse_obj(

5640

response,

5641

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5642

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5643

expected_type=list) or []

5644

continuation_list[0] = None

5645

for item in notification_list:

5646

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5647

if entry:

5648

yield entry

5649

continuation = item.get('continuationItemRenderer')

5650

if continuation:

5651

continuation_list[0] = continuation

5652

5653

def _extract_notification_renderer(self, notification):

5654

video_id = traverse_obj(

5655

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5656

url = f'https://www.youtube.com/watch?v={video_id}'

5657

channel_id = None

5658

if not video_id:

5659

browse_ep = traverse_obj(

5660

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5661

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5662

post_id = self._search_regex(

5663

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5664

'post id', default=None)

5665

if not channel_id or not post_id:

5666

return

5667

# The direct /post url redirects to this in the browser

5668

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5669

5670

channel = traverse_obj(

5671

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5672

expected_type=str)

5673

notification_title = self._get_text(notification, 'shortMessage')

5674

if notification_title:

5675

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5676

# TODO: handle recommended videos

5677

title = self._search_regex(

5678

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5679

'video title', default=None)

5680

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5681

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5687

'video_id': video_id,

5688

'title': title,

5689

'channel_id': channel_id,

5690

'channel': channel,

5691

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5692

'upload_date': upload_date,

5693

}

5694

5695

def _notification_menu_entries(self, ytcfg):

5696

continuation_list = [None]

5697

response = None

5698

for page in itertools.count(1):

5699

ctoken = traverse_obj(

5700

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5701

response = self._extract_response(

5702

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5703

ep='notification/get_notification_menu', check_get_keys='actions',

5704

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5705

yield from self._extract_notification_menu(response, continuation_list)

5706

if not continuation_list[0]:

5707

break

5708

5709

def _real_extract(self, url):

5710

display_id = 'notifications'

5711

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5712

self._report_playlist_authcheck(ytcfg)

5713

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5714

5715

5716

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5717

IE_DESC = 'YouTube search'

5718

IE_NAME = 'youtube:search'

5719

_SEARCH_KEY = 'ytsearch'

5720

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5721

_TESTS = [{

5722

'url': 'ytsearch5:youtube-dl test video',

5723

'playlist_count': 5,

5724

'info_dict': {

5725

'id': 'youtube-dl test video',

5726

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5732

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5733

_SEARCH_KEY = 'ytsearchdate'

5734

IE_DESC = 'YouTube search, newest videos first'

5735

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5736

_TESTS = [{

5737

'url': 'ytsearchdate5:youtube-dl test video',

5738

'playlist_count': 5,

5739

'info_dict': {

5740

'id': 'youtube-dl test video',

5741

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5747

IE_DESC = 'YouTube search URLs with sorting and filter support'

5748

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5749

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5750

_TESTS = [{

5751

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5752

'playlist_mincount': 5,

5753

'info_dict': {

5754

'id': 'youtube-dl test video',

5755

'title': 'youtube-dl test video',

5756

}

5757

}, {

5758

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5759

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5766

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5777

'only_matching': True,

5778

}]

5779

5780

def _real_extract(self, url):

5781

qs = parse_qs(url)

5782

query = (qs.get('search_query') or qs.get('q'))[0]

5783

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5784

5785

5786

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5787

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5788

IE_NAME = 'youtube:music:search_url'

5789

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5790

_TESTS = [{

5791

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5792

'playlist_count': 16,

5793

'info_dict': {

5794

'id': 'royalty free music',

5795

'title': 'royalty free music',

5796

}

5797

}, {

5798

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5799

'playlist_mincount': 30,

5800

'info_dict': {

5801

'id': 'royalty free music - songs',

5802

'title': 'royalty free music - songs',

5803

},

5804

'params': {'extract_flat': 'in_playlist'}

5805

}, {

5806

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5807

'playlist_mincount': 30,

5808

'info_dict': {

5809

'id': 'royalty free music - community playlists',

5810

'title': 'royalty free music - community playlists',

5811

},

5812

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5817

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5818

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5819

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5820

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5821

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5822

}

5823

5824

def _real_extract(self, url):

5825

qs = parse_qs(url)

5826

query = (qs.get('search_query') or qs.get('q'))[0]

5827

params = qs.get('sp', (None,))[0]

5828

if params:

5829

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5830

else:

5831

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5832

params = self._SECTIONS.get(section)

5833

if not params:

5834

section = None

5835

title = join_nonempty(query, section, delim=' - ')

5836

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5837

5838

5839

class YoutubeFeedsInfoExtractor(InfoExtractor):

5840

"""

5841

Base class for feed extractors

5842

Subclasses must re-define the _FEED_NAME property.

5843

"""

5844

_LOGIN_REQUIRED = True

5845

_FEED_NAME = 'feeds'

5846

5847

def _real_initialize(self):

5848

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5853

5854

def _real_extract(self, url):

5855

return self.url_result(

5856

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5857

5858

5859

class YoutubeWatchLaterIE(InfoExtractor):

5860

IE_NAME = 'youtube:watchlater'

5861

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5862

_VALID_URL = r':ytwatchlater'

5863

_TESTS = [{

5864

'url': ':ytwatchlater',

5865

'only_matching': True,

5866

}]

5867

5868

def _real_extract(self, url):

5869

return self.url_result(

5870

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5871

5872

5873

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5874

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5875

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5876

_FEED_NAME = 'recommended'

5877

_LOGIN_REQUIRED = False

5878

_TESTS = [{

5879

'url': ':ytrec',

5880

'only_matching': True,

5881

}, {

5882

'url': ':ytrecommended',

5883

'only_matching': True,

5884

}, {

5885

'url': 'https://youtube.com',

5886

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5891

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5892

_VALID_URL = r':ytsub(?:scription)?s?'

5893

_FEED_NAME = 'subscriptions'

5894

_TESTS = [{

5895

'url': ':ytsubs',

5896

'only_matching': True,

5897

}, {

5898

'url': ':ytsubscriptions',

5899

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5904

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5905

_VALID_URL = r':ythis(?:tory)?'

5906

_FEED_NAME = 'history'

5907

_TESTS = [{

5908

'url': ':ythistory',

5909

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5914

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5915

IE_NAME = 'youtube:stories'

5916

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5917

_TESTS = [{

5918

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5919

'only_matching': True,

5920

}]

5921

5922

def _real_extract(self, url):

5923

playlist_id = f'RLTD{self._match_id(url)}'

5924

return self.url_result(

5925

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5926

ie=YoutubeTabIE, video_id=playlist_id)

5927

5928

5929

class YoutubeTruncatedURLIE(InfoExtractor):

5930

IE_NAME = 'youtube:truncated_url'

5931

IE_DESC = False # Do not list

5932

_VALID_URL = r'''(?x)

5933

(?:https?://)?

5934

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5935

(?:watch\?(?:

5936

feature=[a-z_]+|

5937

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5950

'only_matching': True,

5951

}, {

5952

'url': 'https://www.youtube.com/watch?',

5953

'only_matching': True,

5954

}, {

5955

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5956

'only_matching': True,

5957

}, {

5958

'url': 'https://www.youtube.com/watch?feature=foo',

5959

'only_matching': True,

5960

}, {

5961

'url': 'https://www.youtube.com/watch?hl=en-GB',

5962

'only_matching': True,

5963

}, {

5964

'url': 'https://www.youtube.com/watch?t=2372',

5965

'only_matching': True,

5966

}]

5967

5968

def _real_extract(self, url):

5969

raise ExtractorError(

5970

'Did you forget to quote the URL? Remember that & is a meta '

5971

'character in most shells, so you want to put the URL in quotes, '

5972

'like youtube-dl '

5973

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5974

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5979

IE_NAME = 'youtube:clip'

5980

IE_DESC = False # Do not list

5981

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5982

5983

def _real_extract(self, url):

5984

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5985

return self.url_result(url, 'Generic')

5986

5987

5988

class YoutubeTruncatedIDIE(InfoExtractor):

5989

IE_NAME = 'youtube:truncated_id'

5990

IE_DESC = False # Do not list

5991

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5992

5993

_TESTS = [{

5994

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5995

'only_matching': True,

5996

}]

5997

5998

def _real_extract(self, url):

5999

video_id = self._match_id(url)

6000

raise ExtractorError(

6001

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6002

expected=True)