jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import collections
	4	import copy
	5	import datetime as dt
	6	import enum
	7	import hashlib
	8	import itertools
	9	import json
	10	import math
	11	import os.path
	12	import random
	13	import re
	14	import shlex
	15	import sys
	16	import threading
	17	import time
	18	import traceback
	19	import urllib.parse
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from .openload import PhantomJSwrapper
	23	from ..compat import functools
	24	from ..jsinterp import JSInterpreter
	25	from ..networking.exceptions import HTTPError, network_exceptions
	26	from ..utils import (
	27	NO_DEFAULT,
	28	ExtractorError,
	29	LazyList,
	30	UserNotLive,
	31	bug_reports_message,
	32	classproperty,
	33	clean_html,
	34	datetime_from_str,
	35	dict_get,
	36	filesize_from_tbr,
	37	filter_dict,
	38	float_or_none,
	39	format_field,
	40	get_first,
	41	int_or_none,
	42	is_html,
	43	join_nonempty,
	44	js_to_json,
	45	mimetype2ext,
	46	orderedSet,
	47	parse_codecs,
	48	parse_count,
	49	parse_duration,
	50	parse_iso8601,
	51	parse_qs,
	52	qualities,
	53	remove_start,
	54	smuggle_url,
	55	str_or_none,
	56	str_to_int,
	57	strftime_or_none,
	58	traverse_obj,
	59	try_call,
	60	try_get,
	61	unescapeHTML,
	62	unified_strdate,
	63	unified_timestamp,
	64	unsmuggle_url,
	65	update_url_query,
	66	url_or_none,
	67	urljoin,
	68	variadic,
	69	)
	70
	71	STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
	72	# any clients starting with _ cannot be explicitly requested by the user
	73	INNERTUBE_CLIENTS = {
	74	'web': {
	75	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	76	'INNERTUBE_CONTEXT': {
	77	'client': {
	78	'clientName': 'WEB',
	79	'clientVersion': '2.20220801.00.00',
	80	}
	81	},
	82	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	83	},
	84	'web_embedded': {
	85	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	86	'INNERTUBE_CONTEXT': {
	87	'client': {
	88	'clientName': 'WEB_EMBEDDED_PLAYER',
	89	'clientVersion': '1.20220731.00.00',
	90	},
	91	},
	92	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	93	},
	94	'web_music': {
	95	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	96	'INNERTUBE_HOST': 'music.youtube.com',
	97	'INNERTUBE_CONTEXT': {
	98	'client': {
	99	'clientName': 'WEB_REMIX',
	100	'clientVersion': '1.20220727.01.00',
	101	}
	102	},
	103	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	104	},
	105	'web_creator': {
	106	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	107	'INNERTUBE_CONTEXT': {
	108	'client': {
	109	'clientName': 'WEB_CREATOR',
	110	'clientVersion': '1.20220726.00.00',
	111	}
	112	},
	113	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	114	},
	115	'android': {
	116	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	117	'INNERTUBE_CONTEXT': {
	118	'client': {
	119	'clientName': 'ANDROID',
	120	'clientVersion': '19.09.37',
	121	'androidSdkVersion': 30,
	122	'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
	123	}
	124	},
	125	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	126	'REQUIRE_JS_PLAYER': False
	127	},
	128	'android_embedded': {
	129	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	130	'INNERTUBE_CONTEXT': {
	131	'client': {
	132	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	133	'clientVersion': '19.09.37',
	134	'androidSdkVersion': 30,
	135	'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
	136	},
	137	},
	138	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	139	'REQUIRE_JS_PLAYER': False
	140	},
	141	'android_music': {
	142	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	143	'INNERTUBE_CONTEXT': {
	144	'client': {
	145	'clientName': 'ANDROID_MUSIC',
	146	'clientVersion': '6.42.52',
	147	'androidSdkVersion': 30,
	148	'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip'
	149	}
	150	},
	151	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	152	'REQUIRE_JS_PLAYER': False
	153	},
	154	'android_creator': {
	155	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	156	'INNERTUBE_CONTEXT': {
	157	'client': {
	158	'clientName': 'ANDROID_CREATOR',
	159	'clientVersion': '22.30.100',
	160	'androidSdkVersion': 30,
	161	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	162	},
	163	},
	164	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	165	'REQUIRE_JS_PLAYER': False
	166	},
	167	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	168	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	169	'ios': {
	170	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	171	'INNERTUBE_CONTEXT': {
	172	'client': {
	173	'clientName': 'IOS',
	174	'clientVersion': '19.09.3',
	175	'deviceModel': 'iPhone14,3',
	176	'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	177	}
	178	},
	179	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	180	'REQUIRE_JS_PLAYER': False
	181	},
	182	'ios_embedded': {
	183	'INNERTUBE_CONTEXT': {
	184	'client': {
	185	'clientName': 'IOS_MESSAGES_EXTENSION',
	186	'clientVersion': '19.09.3',
	187	'deviceModel': 'iPhone14,3',
	188	'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	189	},
	190	},
	191	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	192	'REQUIRE_JS_PLAYER': False
	193	},
	194	'ios_music': {
	195	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	196	'INNERTUBE_CONTEXT': {
	197	'client': {
	198	'clientName': 'IOS_MUSIC',
	199	'clientVersion': '6.33.3',
	200	'deviceModel': 'iPhone14,3',
	201	'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	202	},
	203	},
	204	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	205	'REQUIRE_JS_PLAYER': False
	206	},
	207	'ios_creator': {
	208	'INNERTUBE_CONTEXT': {
	209	'client': {
	210	'clientName': 'IOS_CREATOR',
	211	'clientVersion': '22.33.101',
	212	'deviceModel': 'iPhone14,3',
	213	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	214	},
	215	},
	216	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	217	'REQUIRE_JS_PLAYER': False
	218	},
	219	# mweb has 'ultralow' formats
	220	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	221	'mweb': {
	222	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	223	'INNERTUBE_CONTEXT': {
	224	'client': {
	225	'clientName': 'MWEB',
	226	'clientVersion': '2.20220801.00.00',
	227	}
	228	},
	229	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	230	},
	231	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	232	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	233	'tv_embedded': {
	234	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	235	'INNERTUBE_CONTEXT': {
	236	'client': {
	237	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	238	'clientVersion': '2.0',
	239	},
	240	},
	241	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	242	},
	243	# This client has pre-merged video+audio 720p/1080p streams
	244	'mediaconnect': {
	245	'INNERTUBE_CONTEXT': {
	246	'client': {
	247	'clientName': 'MEDIA_CONNECT_FRONTEND',
	248	'clientVersion': '0.1',
	249	},
	250	},
	251	'INNERTUBE_CONTEXT_CLIENT_NAME': 95
	252	},
	253	}
	254
	255
	256	def _split_innertube_client(client_name):
	257	variant, *base = client_name.rsplit('.', 1)
	258	if base:
	259	return variant, base[0], variant
	260	base, *variant = client_name.split('_', 1)
	261	return client_name, base, variant[0] if variant else None
	262
	263
	264	def short_client_name(client_name):
	265	main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
	266	return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
	267
	268
	269	def build_innertube_clients():
	270	THIRD_PARTY = {
	271	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	272	}
	273	BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
	274	priority = qualities(BASE_CLIENTS[::-1])
	275
	276	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	277	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	278	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	279	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	280	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	281
	282	_, base_client, variant = _split_innertube_client(client)
	283	ytcfg['priority'] = 10 * priority(base_client)
	284
	285	if not variant:
	286	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	287	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	288	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	289	embedscreen['priority'] -= 3
	290	elif variant == 'embedded':
	291	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	292	ytcfg['priority'] -= 2
	293	else:
	294	ytcfg['priority'] -= 3
	295
	296
	297	build_innertube_clients()
	298
	299
	300	class BadgeType(enum.Enum):
	301	AVAILABILITY_UNLISTED = enum.auto()
	302	AVAILABILITY_PRIVATE = enum.auto()
	303	AVAILABILITY_PUBLIC = enum.auto()
	304	AVAILABILITY_PREMIUM = enum.auto()
	305	AVAILABILITY_SUBSCRIPTION = enum.auto()
	306	LIVE_NOW = enum.auto()
	307	VERIFIED = enum.auto()
	308
	309
	310	class YoutubeBaseInfoExtractor(InfoExtractor):
	311	"""Provide base functions for Youtube extractors"""
	312
	313	_RESERVED_NAMES = (
	314	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|live\|watch_popup\|clip\|'
	315	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	316	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	317	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	318
	319	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	320
	321	# _NETRC_MACHINE = 'youtube'
	322
	323	# If True it will raise an error if no login info is provided
	324	_LOGIN_REQUIRED = False
	325
	326	_INVIDIOUS_SITES = (
	327	# invidious-redirect websites
	328	r'(?:www\.)?redirect\.invidious\.io',
	329	r'(?:(?:www\|dev)\.)?invidio\.us',
	330	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	331	r'(?:www\.)?invidious\.pussthecat\.org',
	332	r'(?:www\.)?invidious\.zee\.li',
	333	r'(?:www\.)?invidious\.ethibox\.fr',
	334	r'(?:www\.)?iv\.ggtyler\.dev',
	335	r'(?:www\.)?inv\.vern\.i2p',
	336	r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
	337	r'(?:www\.)?inv\.riverside\.rocks',
	338	r'(?:www\.)?invidious\.silur\.me',
	339	r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
	340	r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
	341	r'(?:www\.)?invidious\.slipfox\.xyz',
	342	r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
	343	r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
	344	r'(?:www\.)?invidious\.tiekoetter\.com',
	345	r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
	346	r'(?:www\.)?invidious\.nerdvpn\.de',
	347	r'(?:www\.)?invidious\.weblibre\.org',
	348	r'(?:www\.)?inv\.odyssey346\.dev',
	349	r'(?:www\.)?invidious\.dhusch\.de',
	350	r'(?:www\.)?iv\.melmac\.space',
	351	r'(?:www\.)?watch\.thekitty\.zone',
	352	r'(?:www\.)?invidious\.privacydev\.net',
	353	r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
	354	r'(?:www\.)?invidious\.drivet\.xyz',
	355	r'(?:www\.)?vid\.priv\.au',
	356	r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
	357	r'(?:www\.)?inv\.vern\.cc',
	358	r'(?:www\.)?invidious\.esmailelbob\.xyz',
	359	r'(?:www\.)?invidious\.sethforprivacy\.com',
	360	r'(?:www\.)?yt\.oelrichsgarcia\.de',
	361	r'(?:www\.)?yt\.artemislena\.eu',
	362	r'(?:www\.)?invidious\.flokinet\.to',
	363	r'(?:www\.)?invidious\.baczek\.me',
	364	r'(?:www\.)?y\.com\.sb',
	365	r'(?:www\.)?invidious\.epicsite\.xyz',
	366	r'(?:www\.)?invidious\.lidarshield\.cloud',
	367	r'(?:www\.)?yt\.funami\.tech',
	368	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	369	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	370	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	371	# youtube-dl invidious instances list
	372	r'(?:(?:www\|no)\.)?invidiou\.sh',
	373	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	374	r'(?:www\.)?invidious\.kabi\.tk',
	375	r'(?:www\.)?invidious\.mastodon\.host',
	376	r'(?:www\.)?invidious\.zapashcanon\.fr',
	377	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	378	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	379	r'(?:www\.)?invidious\.himiko\.cloud',
	380	r'(?:www\.)?invidious\.reallyancient\.tech',
	381	r'(?:www\.)?invidious\.tube',
	382	r'(?:www\.)?invidiou\.site',
	383	r'(?:www\.)?invidious\.site',
	384	r'(?:www\.)?invidious\.xyz',
	385	r'(?:www\.)?invidious\.nixnet\.xyz',
	386	r'(?:www\.)?invidious\.048596\.xyz',
	387	r'(?:www\.)?invidious\.drycat\.fr',
	388	r'(?:www\.)?inv\.skyn3t\.in',
	389	r'(?:www\.)?tube\.poal\.co',
	390	r'(?:www\.)?tube\.connect\.cafe',
	391	r'(?:www\.)?vid\.wxzm\.sx',
	392	r'(?:www\.)?vid\.mint\.lgbt',
	393	r'(?:www\.)?vid\.puffyan\.us',
	394	r'(?:www\.)?yewtu\.be',
	395	r'(?:www\.)?yt\.elukerio\.org',
	396	r'(?:www\.)?yt\.lelux\.fi',
	397	r'(?:www\.)?invidious\.ggc-project\.de',
	398	r'(?:www\.)?yt\.maisputain\.ovh',
	399	r'(?:www\.)?ytprivate\.com',
	400	r'(?:www\.)?invidious\.13ad\.de',
	401	r'(?:www\.)?invidious\.toot\.koeln',
	402	r'(?:www\.)?invidious\.fdn\.fr',
	403	r'(?:www\.)?watch\.nettohikari\.com',
	404	r'(?:www\.)?invidious\.namazso\.eu',
	405	r'(?:www\.)?invidious\.silkky\.cloud',
	406	r'(?:www\.)?invidious\.exonip\.de',
	407	r'(?:www\.)?invidious\.riverside\.rocks',
	408	r'(?:www\.)?invidious\.blamefran\.net',
	409	r'(?:www\.)?invidious\.moomoo\.de',
	410	r'(?:www\.)?ytb\.trom\.tf',
	411	r'(?:www\.)?yt\.cyberhost\.uk',
	412	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	413	r'(?:www\.)?qklhadlycap4cnod\.onion',
	414	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	415	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	416	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	417	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	418	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	419	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	420	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	421	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	422	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	423	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	424	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	425	r'(?:www\.)?piped\.kavin\.rocks',
	426	r'(?:www\.)?piped\.tokhmi\.xyz',
	427	r'(?:www\.)?piped\.syncpundit\.io',
	428	r'(?:www\.)?piped\.mha\.fi',
	429	r'(?:www\.)?watch\.whatever\.social',
	430	r'(?:www\.)?piped\.garudalinux\.org',
	431	r'(?:www\.)?piped\.rivo\.lol',
	432	r'(?:www\.)?piped-libre\.kavin\.rocks',
	433	r'(?:www\.)?yt\.jae\.fi',
	434	r'(?:www\.)?piped\.mint\.lgbt',
	435	r'(?:www\.)?il\.ax',
	436	r'(?:www\.)?piped\.esmailelbob\.xyz',
	437	r'(?:www\.)?piped\.projectsegfau\.lt',
	438	r'(?:www\.)?piped\.privacydev\.net',
	439	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	440	r'(?:www\.)?piped\.smnz\.de',
	441	r'(?:www\.)?piped\.adminforge\.de',
	442	r'(?:www\.)?watch\.whatevertinfoil\.de',
	443	r'(?:www\.)?piped\.qdi\.fi',
	444	r'(?:(?:www\|cf)\.)?piped\.video',
	445	r'(?:www\.)?piped\.aeong\.one',
	446	r'(?:www\.)?piped\.moomoo\.me',
	447	r'(?:www\.)?piped\.chauvet\.pro',
	448	r'(?:www\.)?watch\.leptons\.xyz',
	449	r'(?:www\.)?pd\.vern\.cc',
	450	r'(?:www\.)?piped\.hostux\.net',
	451	r'(?:www\.)?piped\.lunar\.icu',
	452	# Hyperpipe instances from https://hyperpipe.codeberg.page/
	453	r'(?:www\.)?hyperpipe\.surge\.sh',
	454	r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
	455	r'(?:www\.)?listen\.whatever\.social',
	456	r'(?:www\.)?music\.adminforge\.de',
	457	)
	458
	459	# extracted from account/account_menu ep
	460	# XXX: These are the supported YouTube UI and API languages,
	461	# which is slightly different from languages supported for translation in YouTube studio
	462	_SUPPORTED_LANG_CODES = [
	463	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	464	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	465	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	466	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	467	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	468	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	469	]
	470
	471	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	472
	473	_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
	474	_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
	475
	476	def ucid_or_none(self, ucid):
	477	return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
	478
	479	def handle_or_none(self, handle):
	480	return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
	481
	482	def handle_from_url(self, url):
	483	return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
	484	url, 'channel handle', default=None)
	485
	486	def ucid_from_url(self, url):
	487	return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
	488	url, 'channel id', default=None)
	489
	490	@functools.cached_property
	491	def _preferred_lang(self):
	492	"""
	493	Returns a language code supported by YouTube for the user preferred language.
	494	Returns None if no preferred language set.
	495	"""
	496	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	497	if not preferred_lang:
	498	return
	499	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	500	raise ExtractorError(

1

import base64

import calendar

import collections

import copy

import datetime as dt

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import shlex

import sys

import threading

import time

import traceback

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

22

from .openload import PhantomJSwrapper

23

from ..compat import functools

24

from ..jsinterp import JSInterpreter

25

from ..networking.exceptions import HTTPError, network_exceptions

26

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filesize_from_tbr,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_call,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'

72

# any clients starting with _ cannot be explicitly requested by the user

73

INNERTUBE_CLIENTS = {

74

'web': {

75

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

76

'INNERTUBE_CONTEXT': {

77

'client': {

78

'clientName': 'WEB',

79

'clientVersion': '2.20220801.00.00',

80

}

81

},

82

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

83

},

84

'web_embedded': {

85

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

86

'INNERTUBE_CONTEXT': {

87

'client': {

88

'clientName': 'WEB_EMBEDDED_PLAYER',

89

'clientVersion': '1.20220731.00.00',

90

},

91

},

92

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

93

},

94

'web_music': {

95

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

96

'INNERTUBE_HOST': 'music.youtube.com',

97

'INNERTUBE_CONTEXT': {

98

'client': {

99

'clientName': 'WEB_REMIX',

100

'clientVersion': '1.20220727.01.00',

101

}

102

},

103

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

104

},

105

'web_creator': {

106

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

107

'INNERTUBE_CONTEXT': {

108

'client': {

109

'clientName': 'WEB_CREATOR',

110

'clientVersion': '1.20220726.00.00',

111

}

112

},

113

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

114

},

115

'android': {

116

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

117

'INNERTUBE_CONTEXT': {

118

'client': {

119

'clientName': 'ANDROID',

120

'clientVersion': '19.09.37',

121

'androidSdkVersion': 30,

122

'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'

123

}

124

},

125

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

126

'REQUIRE_JS_PLAYER': False

127

},

128

'android_embedded': {

129

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

130

'INNERTUBE_CONTEXT': {

131

'client': {

132

'clientName': 'ANDROID_EMBEDDED_PLAYER',

133

'clientVersion': '19.09.37',

134

'androidSdkVersion': 30,

135

'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'

136

},

137

},

138

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

139

'REQUIRE_JS_PLAYER': False

140

},

141

'android_music': {

142

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

143

'INNERTUBE_CONTEXT': {

144

'client': {

145

'clientName': 'ANDROID_MUSIC',

146

'clientVersion': '6.42.52',

147

'androidSdkVersion': 30,

148

'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip'

149

}

150

},

151

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

152

'REQUIRE_JS_PLAYER': False

153

},

154

'android_creator': {

155

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

156

'INNERTUBE_CONTEXT': {

157

'client': {

158

'clientName': 'ANDROID_CREATOR',

159

'clientVersion': '22.30.100',

160

'androidSdkVersion': 30,

161

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

162

},

163

},

164

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

165

'REQUIRE_JS_PLAYER': False

166

},

167

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

168

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

169

'ios': {

170

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

171

'INNERTUBE_CONTEXT': {

172

'client': {

173

'clientName': 'IOS',

174

'clientVersion': '19.09.3',

175

'deviceModel': 'iPhone14,3',

176

'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

177

}

178

},

179

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

180

'REQUIRE_JS_PLAYER': False

181

},

182

'ios_embedded': {

183

'INNERTUBE_CONTEXT': {

184

'client': {

185

'clientName': 'IOS_MESSAGES_EXTENSION',

186

'clientVersion': '19.09.3',

187

'deviceModel': 'iPhone14,3',

188

'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

189

},

190

},

191

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

192

'REQUIRE_JS_PLAYER': False

193

},

194

'ios_music': {

195

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

196

'INNERTUBE_CONTEXT': {

197

'client': {

198

'clientName': 'IOS_MUSIC',

199

'clientVersion': '6.33.3',

200

'deviceModel': 'iPhone14,3',

201

'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

202

},

203

},

204

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

205

'REQUIRE_JS_PLAYER': False

206

},

207

'ios_creator': {

208

'INNERTUBE_CONTEXT': {

209

'client': {

210

'clientName': 'IOS_CREATOR',

211

'clientVersion': '22.33.101',

212

'deviceModel': 'iPhone14,3',

213

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

214

},

215

},

216

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

217

'REQUIRE_JS_PLAYER': False

218

},

219

# mweb has 'ultralow' formats

220

# See: https://github.com/yt-dlp/yt-dlp/pull/557

221

'mweb': {

222

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

223

'INNERTUBE_CONTEXT': {

224

'client': {

225

'clientName': 'MWEB',

226

'clientVersion': '2.20220801.00.00',

227

}

228

},

229

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

230

},

231

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

232

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

233

'tv_embedded': {

234

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

235

'INNERTUBE_CONTEXT': {

236

'client': {

237

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

238

'clientVersion': '2.0',

239

},

240

},

241

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

242

},

243

# This client has pre-merged video+audio 720p/1080p streams

244

'mediaconnect': {

245

'INNERTUBE_CONTEXT': {

246

'client': {

247

'clientName': 'MEDIA_CONNECT_FRONTEND',

248

'clientVersion': '0.1',

249

},

250

},

251

'INNERTUBE_CONTEXT_CLIENT_NAME': 95

},

}

def _split_innertube_client(client_name):

257

variant, *base = client_name.rsplit('.', 1)

258

if base:

259

return variant, base[0], variant

260

base, *variant = client_name.split('_', 1)

261

return client_name, base, variant[0] if variant else None

262

263

264

def short_client_name(client_name):

265

main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')

266

return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()

267

268

269

def build_innertube_clients():

270

THIRD_PARTY = {

271

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

272

}

273

BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')

274

priority = qualities(BASE_CLIENTS[::-1])

275

276

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

277

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

278

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

279

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

280

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

281

282

_, base_client, variant = _split_innertube_client(client)

283

ytcfg['priority'] = 10 * priority(base_client)

284

285

if not variant:

286

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

287

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

288

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

289

embedscreen['priority'] -= 3

290

elif variant == 'embedded':

291

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

292

ytcfg['priority'] -= 2

293

else:

294

ytcfg['priority'] -= 3

295

296

297

build_innertube_clients()

298

299

300

class BadgeType(enum.Enum):

301

AVAILABILITY_UNLISTED = enum.auto()

302

AVAILABILITY_PRIVATE = enum.auto()

303

AVAILABILITY_PUBLIC = enum.auto()

304

AVAILABILITY_PREMIUM = enum.auto()

305

AVAILABILITY_SUBSCRIPTION = enum.auto()

306

LIVE_NOW = enum.auto()

307

VERIFIED = enum.auto()

308

309

310

class YoutubeBaseInfoExtractor(InfoExtractor):

311

"""Provide base functions for Youtube extractors"""

_RESERVED_NAMES = (

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

320

321

# _NETRC_MACHINE = 'youtube'

322

323

# If True it will raise an error if no login info is provided

324

_LOGIN_REQUIRED = False

325

326

_INVIDIOUS_SITES = (

327

# invidious-redirect websites

328

r'(?:www\.)?redirect\.invidious\.io',

329

r'(?:(?:www|dev)\.)?invidio\.us',

330

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

331

r'(?:www\.)?invidious\.pussthecat\.org',

332

r'(?:www\.)?invidious\.zee\.li',

333

r'(?:www\.)?invidious\.ethibox\.fr',

334

r'(?:www\.)?iv\.ggtyler\.dev',

335

r'(?:www\.)?inv\.vern\.i2p',

336

r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',

337

r'(?:www\.)?inv\.riverside\.rocks',

338

r'(?:www\.)?invidious\.silur\.me',

339

r'(?:www\.)?inv\.bp\.projectsegfau\.lt',

340

r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',

341

r'(?:www\.)?invidious\.slipfox\.xyz',

342

r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',

343

r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',

344

r'(?:www\.)?invidious\.tiekoetter\.com',

345

r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',

346

r'(?:www\.)?invidious\.nerdvpn\.de',

347

r'(?:www\.)?invidious\.weblibre\.org',

348

r'(?:www\.)?inv\.odyssey346\.dev',

349

r'(?:www\.)?invidious\.dhusch\.de',

350

r'(?:www\.)?iv\.melmac\.space',

351

r'(?:www\.)?watch\.thekitty\.zone',

352

r'(?:www\.)?invidious\.privacydev\.net',

353

r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',

354

r'(?:www\.)?invidious\.drivet\.xyz',

355

r'(?:www\.)?vid\.priv\.au',

356

r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',

357

r'(?:www\.)?inv\.vern\.cc',

358

r'(?:www\.)?invidious\.esmailelbob\.xyz',

359

r'(?:www\.)?invidious\.sethforprivacy\.com',

360

r'(?:www\.)?yt\.oelrichsgarcia\.de',

361

r'(?:www\.)?yt\.artemislena\.eu',

362

r'(?:www\.)?invidious\.flokinet\.to',

363

r'(?:www\.)?invidious\.baczek\.me',

364

r'(?:www\.)?y\.com\.sb',

365

r'(?:www\.)?invidious\.epicsite\.xyz',

366

r'(?:www\.)?invidious\.lidarshield\.cloud',

367

r'(?:www\.)?yt\.funami\.tech',

368

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

369

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

370

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

371

# youtube-dl invidious instances list

372

r'(?:(?:www|no)\.)?invidiou\.sh',

373

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

374

r'(?:www\.)?invidious\.kabi\.tk',

375

r'(?:www\.)?invidious\.mastodon\.host',

376

r'(?:www\.)?invidious\.zapashcanon\.fr',

377

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

378

r'(?:www\.)?invidious\.tinfoil-hat\.net',

379

r'(?:www\.)?invidious\.himiko\.cloud',

380

r'(?:www\.)?invidious\.reallyancient\.tech',

381

r'(?:www\.)?invidious\.tube',

382

r'(?:www\.)?invidiou\.site',

383

r'(?:www\.)?invidious\.site',

384

r'(?:www\.)?invidious\.xyz',

385

r'(?:www\.)?invidious\.nixnet\.xyz',

386

r'(?:www\.)?invidious\.048596\.xyz',

387

r'(?:www\.)?invidious\.drycat\.fr',

388

r'(?:www\.)?inv\.skyn3t\.in',

389

r'(?:www\.)?tube\.poal\.co',

390

r'(?:www\.)?tube\.connect\.cafe',

391

r'(?:www\.)?vid\.wxzm\.sx',

392

r'(?:www\.)?vid\.mint\.lgbt',

393

r'(?:www\.)?vid\.puffyan\.us',

394

r'(?:www\.)?yewtu\.be',

395

r'(?:www\.)?yt\.elukerio\.org',

396

r'(?:www\.)?yt\.lelux\.fi',

397

r'(?:www\.)?invidious\.ggc-project\.de',

398

r'(?:www\.)?yt\.maisputain\.ovh',

399

r'(?:www\.)?ytprivate\.com',

400

r'(?:www\.)?invidious\.13ad\.de',

401

r'(?:www\.)?invidious\.toot\.koeln',

402

r'(?:www\.)?invidious\.fdn\.fr',

403

r'(?:www\.)?watch\.nettohikari\.com',

404

r'(?:www\.)?invidious\.namazso\.eu',

405

r'(?:www\.)?invidious\.silkky\.cloud',

406

r'(?:www\.)?invidious\.exonip\.de',

407

r'(?:www\.)?invidious\.riverside\.rocks',

408

r'(?:www\.)?invidious\.blamefran\.net',

409

r'(?:www\.)?invidious\.moomoo\.de',

410

r'(?:www\.)?ytb\.trom\.tf',

411

r'(?:www\.)?yt\.cyberhost\.uk',

412

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

413

r'(?:www\.)?qklhadlycap4cnod\.onion',

414

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

415

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

416

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

417

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

418

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

419

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

420

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

421

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

422

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

423

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

424

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

425

r'(?:www\.)?piped\.kavin\.rocks',

426

r'(?:www\.)?piped\.tokhmi\.xyz',

427

r'(?:www\.)?piped\.syncpundit\.io',

428

r'(?:www\.)?piped\.mha\.fi',

429

r'(?:www\.)?watch\.whatever\.social',

430

r'(?:www\.)?piped\.garudalinux\.org',

431

r'(?:www\.)?piped\.rivo\.lol',

432

r'(?:www\.)?piped-libre\.kavin\.rocks',

433

r'(?:www\.)?yt\.jae\.fi',

434

r'(?:www\.)?piped\.mint\.lgbt',

435

r'(?:www\.)?il\.ax',

436

r'(?:www\.)?piped\.esmailelbob\.xyz',

437

r'(?:www\.)?piped\.projectsegfau\.lt',

438

r'(?:www\.)?piped\.privacydev\.net',

439

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

440

r'(?:www\.)?piped\.smnz\.de',

441

r'(?:www\.)?piped\.adminforge\.de',

442

r'(?:www\.)?watch\.whatevertinfoil\.de',

443

r'(?:www\.)?piped\.qdi\.fi',

444

r'(?:(?:www|cf)\.)?piped\.video',

445

r'(?:www\.)?piped\.aeong\.one',

446

r'(?:www\.)?piped\.moomoo\.me',

447

r'(?:www\.)?piped\.chauvet\.pro',

448

r'(?:www\.)?watch\.leptons\.xyz',

449

r'(?:www\.)?pd\.vern\.cc',

450

r'(?:www\.)?piped\.hostux\.net',

451

r'(?:www\.)?piped\.lunar\.icu',

452

# Hyperpipe instances from https://hyperpipe.codeberg.page/

453

r'(?:www\.)?hyperpipe\.surge\.sh',

454

r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',

455

r'(?:www\.)?listen\.whatever\.social',

456

r'(?:www\.)?music\.adminforge\.de',

457

)

458

459

# extracted from account/account_menu ep

460

# XXX: These are the supported YouTube UI and API languages,

461

# which is slightly different from languages supported for translation in YouTube studio

462

_SUPPORTED_LANG_CODES = [

463

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

464

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

465

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

466

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

467

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

468

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

469

]

470

471

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

472

473

_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en

474

_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'

475

476

def ucid_or_none(self, ucid):

477

return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)

478

479

def handle_or_none(self, handle):

480

return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)

481

482

def handle_from_url(self, url):

483

return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',

484

url, 'channel handle', default=None)

485

486

def ucid_from_url(self, url):

487

return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',

488

url, 'channel id', default=None)

489

490

@functools.cached_property

491

def _preferred_lang(self):

492

"""

493

Returns a language code supported by YouTube for the user preferred language.

494

Returns None if no preferred language set.

495

"""

496

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

497

if not preferred_lang:

498

return

499

if preferred_lang not in self._SUPPORTED_LANG_CODES:

500

raise ExtractorError(

501

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

502

expected=True)

503

elif preferred_lang != 'en':

504

self.report_warning(

505

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

506

return preferred_lang

507

508

def _initialize_consent(self):

509

cookies = self._get_cookies('https://www.youtube.com/')

510

if cookies.get('__Secure-3PSID'):

511

return

512

socs = cookies.get('SOCS')

513

if socs and not socs.value.startswith('CAA'): # not consented

514

return

515

self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)

516

517

def _initialize_pref(self):

518

cookies = self._get_cookies('https://www.youtube.com/')

519

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

524

except ValueError:

525

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

526

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

527

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

528

529

def _real_initialize(self):

530

self._initialize_pref()

531

self._initialize_consent()

532

self._check_login_required()

533

534

def _check_login_required(self):

535

if self._LOGIN_REQUIRED and not self._cookies_passed:

536

self.raise_login_required('Login details are needed to download this content', method='cookies')

537

538

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

539

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

540

541

def _get_default_ytcfg(self, client='web'):

542

return copy.deepcopy(INNERTUBE_CLIENTS[client])

543

544

def _get_innertube_host(self, client='web'):

545

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

546

547

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

548

# try_get but with fallback to default ytcfg client values when present

549

_func = lambda y: try_get(y, getter, expected_type)

550

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

551

552

def _extract_client_name(self, ytcfg, default_client='web'):

553

return self._ytcfg_get_safe(

554

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

555

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

556

557

def _extract_client_version(self, ytcfg, default_client='web'):

558

return self._ytcfg_get_safe(

559

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

560

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

561

562

def _select_api_hostname(self, req_api_hostname, default_client=None):

563

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

564

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

565

566

def _extract_api_key(self, ytcfg=None, default_client='web'):

567

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

568

569

def _extract_context(self, ytcfg=None, default_client='web'):

570

context = get_first(

571

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

572

# Enforce language and tz for extraction

573

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

574

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

580

time_now = round(time.time())

581

if self._SAPISID is None:

582

yt_cookies = self._get_cookies('https://www.youtube.com')

583

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

584

# See: https://github.com/yt-dlp/yt-dlp/issues/393

585

sapisid_cookie = dict_get(

586

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

587

if sapisid_cookie and sapisid_cookie.value:

588

self._SAPISID = sapisid_cookie.value

589

self.write_debug('Extracted SAPISID cookie')

590

# SAPISID cookie is required if not already present

591

if not yt_cookies.get('SAPISID'):

592

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

593

self._set_cookie(

594

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

595

else:

596

self._SAPISID = False

597

if not self._SAPISID:

598

return None

599

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

600

sapisidhash = hashlib.sha1(

601

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

602

return f'SAPISIDHASH {time_now}_{sapisidhash}'

603

604

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

605

note='Downloading API JSON', errnote='Unable to download API page',

606

context=None, api_key=None, api_hostname=None, default_client='web'):

607

608

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

609

data.update(query)

610

real_headers = self.generate_api_headers(default_client=default_client)

611

real_headers.update({'content-type': 'application/json'})

612

if headers:

613

real_headers.update(headers)

614

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

615

or api_key or self._extract_api_key(default_client=default_client))

616

return self._download_json(

617

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

618

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

619

data=json.dumps(data).encode('utf8'), headers=real_headers,

620

query={'key': api_key, 'prettyPrint': 'false'})

621

622

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

623

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

624

625

@staticmethod

626

def _extract_session_index(*data):

627

"""

628

Index of current account in account list.

629

See: https://github.com/yt-dlp/yt-dlp/pull/519

630

"""

631

for ytcfg in data:

632

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

633

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

638

if ytcfg:

639

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

644

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

645

'identity token', default=None, fatal=False)

646

647

@staticmethod

648

def _extract_account_syncid(*args):

649

"""

650

Extract syncId required to download private playlists of secondary channels

651

@params response and/or ytcfg

652

"""

653

for data in args:

654

# ytcfg includes channel_syncid if on secondary channel

655

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

660

lambda x: x['DATASYNC_ID']), str) or '').split('||')

661

if len(sync_ids) >= 2 and sync_ids[1]:

662

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

663

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

668

"""

669

Extracts visitorData from an API response or ytcfg

670

Appears to be used to track session state

671

"""

672

return get_first(

673

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

674

expected_type=str)

675

676

@functools.cached_property

677

def is_authenticated(self):

678

return bool(self._generate_sapisidhash_header())

679

680

def extract_ytcfg(self, video_id, webpage):

681

if not webpage:

682

return {}

683

return self._parse_json(

684

self._search_regex(

685

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

686

default='{}'), video_id, fatal=False) or {}

687

688

def generate_api_headers(

689

self, *, ytcfg=None, account_syncid=None, session_index=None,

690

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

691

692

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

693

headers = {

694

'X-YouTube-Client-Name': str(

695

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

696

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

697

'Origin': origin,

698

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

699

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

700

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

701

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

702

}

703

if session_index is None:

704

session_index = self._extract_session_index(ytcfg)

705

if account_syncid or session_index is not None:

706

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

707

708

auth = self._generate_sapisidhash_header(origin)

709

if auth is not None:

710

headers['Authorization'] = auth

711

headers['X-Origin'] = origin

712

return filter_dict(headers)

713

714

def _download_ytcfg(self, client, video_id):

715

url = {

716

'web': 'https://www.youtube.com',

717

'web_music': 'https://music.youtube.com',

718

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

723

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

724

return self.extract_ytcfg(video_id, webpage) or {}

725

726

@staticmethod

727

def _build_api_continuation_query(continuation, ctp=None):

728

query = {

729

'continuation': continuation

730

}

731

# TODO: Inconsistency with clickTrackingParams.

732

# Currently we have a fixed ctp contained within context (from ytcfg)

733

# and a ctp in root query for continuation.

734

if ctp:

735

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

740

next_continuation = try_get(

741

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

742

lambda x: x['continuation']['reloadContinuationData']), dict)

743

if not next_continuation:

744

return

745

continuation = next_continuation.get('continuation')

746

if not continuation:

747

return

748

ctp = next_continuation.get('clickTrackingParams')

749

return cls._build_api_continuation_query(continuation, ctp)

750

751

@classmethod

752

def _extract_continuation_ep_data(cls, continuation_ep: dict):

753

if isinstance(continuation_ep, dict):

754

continuation = try_get(

755

continuation_ep, lambda x: x['continuationCommand']['token'], str)

756

if not continuation:

757

return

758

ctp = continuation_ep.get('clickTrackingParams')

759

return cls._build_api_continuation_query(continuation, ctp)

760

761

@classmethod

762

def _extract_continuation(cls, renderer):

763

next_continuation = cls._extract_next_continuation_data(renderer)

764

if next_continuation:

765

return next_continuation

766

767

return traverse_obj(renderer, (

768

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

769

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

770

), get_all=False, expected_type=cls._extract_continuation_ep_data)

771

772

@classmethod

773

def _extract_alerts(cls, data):

774

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

775

if not isinstance(alert_dict, dict):

776

continue

777

for alert in alert_dict.values():

778

alert_type = alert.get('type')

779

if not alert_type:

780

continue

781

message = cls._get_text(alert, 'text')

782

if message:

783

yield alert_type, message

784

785

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

786

errors, warnings = [], []

787

for alert_type, alert_message in alerts:

788

if alert_type.lower() == 'error' and fatal:

789

errors.append([alert_type, alert_message])

790

elif alert_message not in self._IGNORED_WARNINGS:

791

warnings.append([alert_type, alert_message])

792

793

for alert_type, alert_message in (warnings + errors[:-1]):

794

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

795

if errors:

796

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

797

798

def _extract_and_report_alerts(self, data, *args, **kwargs):

799

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

800

801

def _extract_badges(self, badge_list: list):

802

"""

803

Extract known BadgeType's from a list of badge renderers.

804

@returns [{'type': BadgeType}]

805

"""

806

icon_type_map = {

807

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

808

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

809

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,

810

'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,

811

'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,

812

'CHECK': BadgeType.VERIFIED,

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

817

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

818

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,

819

'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,

820

'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

825

'private': BadgeType.AVAILABILITY_PRIVATE,

826

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

827

'live': BadgeType.LIVE_NOW,

828

'premium': BadgeType.AVAILABILITY_PREMIUM,

829

'verified': BadgeType.VERIFIED,

830

'official artist channel': BadgeType.VERIFIED,

}

badges = []

for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):

835

badge_type = (

836

icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

837

or badge_style_map.get(traverse_obj(badge, 'style'))

838

)

839

if badge_type:

840

badges.append({'type': badge_type})

841

continue

842

843

# fallback, won't work in some languages

844

label = traverse_obj(

845

badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')

846

for match, label_badge_type in label_map.items():

847

if match in label.lower():

848

badges.append({'type': label_badge_type})

break

return badges

@staticmethod

def _has_badge(badges, badge_type):

855

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

856

857

@staticmethod

858

def _get_text(data, *path_list, max_runs=None):

859

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

864

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

865

obj = [obj]

866

for item in obj:

867

text = try_get(item, lambda x: x['simpleText'], str)

868

if text:

869

return text

870

runs = try_get(item, lambda x: x['runs'], list) or []

871

if not runs and isinstance(item, list):

872

runs = item

873

874

runs = runs[:min(len(runs), max_runs or len(runs))]

875

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))

if text:

return text

def _get_count(self, data, *path_list):

880

count_text = self._get_text(data, *path_list) or ''

881

count = parse_count(count_text)

882

if count is None:

883

count = str_to_int(

884

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

889

"""

890

Extract thumbnails from thumbnails dict

891

@param path_list: path list to level that contains 'thumbnails' key

892

"""

893

thumbnails = []

894

for path in path_list or [()]:

895

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):

896

thumbnail_url = url_or_none(thumbnail.get('url'))

897

if not thumbnail_url:

898

continue

899

# Sometimes youtube gives a wrong thumbnail URL. See:

900

# https://github.com/yt-dlp/yt-dlp/issues/233

901

# https://github.com/ytdl-org/youtube-dl/issues/28023

902

if 'maxresdefault' in thumbnail_url:

903

thumbnail_url = thumbnail_url.split('?')[0]

904

thumbnails.append({

905

'url': thumbnail_url,

906

'height': int_or_none(thumbnail.get('height')),

907

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

913

"""

914

Extracts a relative time from string and converts to dt object

915

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'

916

"""

917

918

# XXX: this could be moved to a general function in utils/_utils.py

919

# The relative time text strings are roughly the same as what

920

# Javascript's Intl.RelativeTimeFormat function generates.

921

# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat

mobj = re.search(

relative_time_text)

if mobj:

start = mobj.group('start')

927

if start:

928

return datetime_from_str(start)

929

try:

930

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

935

if not text:

936

return

937

dt_ = self.extract_relative_time(text)

938

timestamp = None

939

if isinstance(dt_, dt.datetime):

940

timestamp = calendar.timegm(dt_.timetuple())

941

942

if timestamp is None:

943

timestamp = (

944

unified_timestamp(text) or unified_timestamp(

945

self._search_regex(

946

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

947

text.lower(), 'time text', default=None)))

948

949

if text and timestamp is None and self._preferred_lang in (None, 'en'):

950

self.report_warning(

951

f'Cannot parse localized time text "{text}"', only_once=True)

952

return timestamp

953

954

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

955

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

956

default_client='web'):

957

raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))

958

# Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.

959

icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))

960

icd_rm = next(icd_retries)

961

main_retries = iter(self.RetryManager())

962

main_rm = next(main_retries)

963

# Manual retry loop for multiple RetryManagers

964

# The proper RetryManager MUST be advanced after an error

965

# and its result MUST be checked if the manager is non fatal

966

while True:

967

try:

968

response = self._call_api(

969

ep=ep, fatal=True, headers=headers,

970

video_id=item_id, query=query, note=note,

971

context=self._extract_context(ytcfg, default_client),

972

api_key=self._extract_api_key(ytcfg, default_client),

973

api_hostname=api_hostname, default_client=default_client)

974

except ExtractorError as e:

975

if not isinstance(e.cause, network_exceptions):

976

return self._error_or_warning(e, fatal=fatal)

977

elif not isinstance(e.cause, HTTPError):

main_rm.error = e

next(main_retries)

continue

first_bytes = e.cause.response.read(512)

983

if not is_html(first_bytes):

984

yt_error = try_get(

985

self._parse_json(

986

self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

987

lambda x: x['error']['message'], str)

988

if yt_error:

989

self._report_alerts([('ERROR', yt_error)], fatal=False)

990

# Downloading page may result in intermittent 5xx HTTP error

991

# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289

992

# We also want to catch all other network exceptions since errors in later pages can be troublesome

993

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

994

if e.cause.status not in (403, 429):

main_rm.error = e

next(main_retries)

continue

return self._error_or_warning(e, fatal=fatal)

999

1000

try:

1001

self._extract_and_report_alerts(response, only_once=True)

1002

except ExtractorError as e:

1003

# YouTube's servers may return errors we want to retry on in a 200 OK response

1004

# See: https://github.com/yt-dlp/yt-dlp/issues/839

1005

if 'unknown error' in e.msg.lower():

main_rm.error = e

next(main_retries)

continue

return self._error_or_warning(e, fatal=fatal)

1010

# Youtube sometimes sends incomplete data

1011

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

1012

if not traverse_obj(response, *variadic(check_get_keys)):

1013

icd_rm.error = ExtractorError('Incomplete data received', expected=True)

1014

should_retry = next(icd_retries, None)

if not should_retry:

return None

continue

return response

@staticmethod

def is_music_url(url):

1023

return re.match(r'(https?://)?music\.youtube\.com/', url) is not None

1024

1025

def _extract_video(self, renderer):

1026

video_id = renderer.get('videoId')

1027

1028

reel_header_renderer = traverse_obj(renderer, (

1029

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

1030

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

1031

1032

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

1033

description = self._get_text(renderer, 'descriptionSnippet')

1034

1035

duration = int_or_none(renderer.get('lengthSeconds'))

1036

if duration is None:

1037

duration = parse_duration(self._get_text(

1038

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

1039

if duration is None:

1040

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

1041

duration = parse_duration(self._search_regex(

1042

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

1043

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

1044

video_id, default=None, group='duration'))

1045

1046

channel_id = traverse_obj(

1047

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

1048

expected_type=str, get_all=False)

1049

if not channel_id:

1050

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

1051

1052

channel_id = self.ucid_or_none(channel_id)

1053

1054

overlay_style = traverse_obj(

1055

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

1056

get_all=False, expected_type=str)

1057

badges = self._extract_badges(traverse_obj(renderer, 'badges'))

1058

owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))

1059

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

1060

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

1061

expected_type=str)) or ''

1062

url = f'https://www.youtube.com/watch?v={video_id}'

1063

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

1064

url = f'https://www.youtube.com/shorts/{video_id}'

1065

1066

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

1067

or self._get_text(reel_header_renderer, 'timestampText') or '')

1068

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

1069

1070

live_status = (

1071

'is_upcoming' if scheduled_timestamp is not None

1072

else 'was_live' if 'streamed' in time_text.lower()

1073

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

1074

else None)

1075

1076

# videoInfo is a string like '50K views • 10 years ago'.

1077

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

1078

view_count = (0 if 'no views' in view_count_text.lower()

1079

else self._get_count({'simpleText': view_count_text}))

1080

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

1081

1082

channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')

1083

or self._get_text(reel_header_renderer, 'channelTitleText'))

1084

1085

channel_handle = traverse_obj(renderer, (

1086

'shortBylineText', 'runs', ..., 'navigationEndpoint',

1087

(('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),

1088

expected_type=self.handle_from_url, get_all=False)

1089

return {

1090

'_type': 'url',

1091

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

1096

'duration': duration,

1097

'channel_id': channel_id,

1098

'channel': channel,

1099

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

1100

'uploader': channel,

1101

'uploader_id': channel_handle,

1102

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

1103

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

1104

'timestamp': (self._parse_time_text(time_text)

1105

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

1106

else None),

1107

'release_timestamp': scheduled_timestamp,

1108

'availability':

1109

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

1110

else self._availability(

1111

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

1112

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

1113

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

1114

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

1115

view_count_field: view_count,

1116

'live_status': live_status,

1117

'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None

}

class YoutubeIE(YoutubeBaseInfoExtractor):

1122

IE_DESC = 'YouTube'

1123

_VALID_URL = r"""(?x)^

1124

(

1125

(?:https?://|//) # http(s):// or protocol-independent URL

1126

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1127

(?:www\.)?deturl\.com/www\.youtube\.com|

1128

(?:www\.)?pwnyoutube\.com|

1129

(?:www\.)?hooktube\.com|

1130

(?:www\.)?yourepeat\.com|

1131

tube\.majestyc\.net|

1132

%(invidious)s|

1133

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1134

(?:.*?\#/)? # handle anchor (#/) redirect urls

1135

(?: # the various things that can precede the ID:

1136

1137

|(?: # or the v= param in all its forms

1138

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1139

(?:\?|\#!?) # the params delimiter ? or # or #!

1140

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1146

vid\.plus| # or vid.plus/xxxx

1147

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1148

%(invidious)s

1149

)/

1150

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1151

)

1152

)? # all until now is optional -> you can pass the naked ID

1153

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1154

(?(1).+)? # if we found the ID, everything can follow

1155

(?:\#|$)""" % {

1156

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1170

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1171

\1''',

1172

# https://wordpress.org/plugins/lazy-load-for-videos/

1173

r'''(?xs)

1174

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1175

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1176

]

1177

_RETURN_TYPE = 'video' # XXX: How to handle multifeed?

1178

1179

_PLAYER_INFO_RE = (

1180

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1181

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1182

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1183

)

1184

_formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE

1185

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1186

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1187

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1188

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1189

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1190

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1191

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1192

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1193

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1194

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1195

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1196

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1197

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1198

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1199

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1200

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1201

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1202

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1207

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1208

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1209

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1210

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1211

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1212

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1213

1214

# Apple HTTP Live Streaming

1215

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1216

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1217

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1218

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1219

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1220

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1221

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1222

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1223

1224

# DASH mp4 video

1225

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1226

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1227

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1228

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1229

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1230

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1231

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1232

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1233

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1234

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1235

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1236

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1237

1238

# Dash mp4 audio

1239

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1240

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1241

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1242

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1243

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1244

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1245

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1246

1247

# Dash webm

1248

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1249

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1250

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1251

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1252

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1253

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1254

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1255

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1256

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1257

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1258

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1259

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1260

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1261

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1262

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1263

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1264

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1265

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1266

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1267

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1268

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1269

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1270

1271

# Dash webm audio

1272

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1273

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1274

1275

# Dash webm audio with opus inside

1276

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1277

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1278

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1279

1280

# RTMP (unnamed)

1281

'_rtmp': {'protocol': 'rtmp'},

1282

1283

# av01 video only formats sometimes served with "unknown" codecs

1284

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1285

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1286

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1287

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1288

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1289

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1290

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1291

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1292

}

1293

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1305

'channel': 'Philipp Hagemeister',

1306

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1307

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1308

'upload_date': '20121002',

1309

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1310

'categories': ['Science & Technology'],

1311

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1316

'playable_in_embed': True,

1317

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1318

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1323

'channel_follower_count': int,

1324

'uploader': 'Philipp Hagemeister',

1325

'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',

1326

'uploader_id': '@PhilippHagemeister',

1327

'heatmap': 'count:100',

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1332

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1337

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1338

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1339

'age_limit': 18,

1340

},

1341

'skip': 'Private video',

1342

},

1343

{

1344

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1345

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1350

'channel': 'Philipp Hagemeister',

1351

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1352

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1353

'upload_date': '20121002',

1354

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1355

'categories': ['Science & Technology'],

1356

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1361

'playable_in_embed': True,

1362

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1363

'live_status': 'not_live',

1364

'age_limit': 0,

1365

'comment_count': int,

1366

'channel_follower_count': int,

1367

'uploader': 'Philipp Hagemeister',

1368

'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',

1369

'uploader_id': '@PhilippHagemeister',

1370

'heatmap': 'count:100',

1371

},

1372

'params': {

1373

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1378

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1383

'description': '',

1384

'title': 'UHDTV TEST 8K VIDEO.mp4'

1385

},

1386

'params': {

1387

'youtube_include_dash_manifest': True,

1388

'format': '141',

1389

},

1390

'skip': 'format 141 not served anymore',

1391

},

1392

# DASH manifest with encrypted signature

1393

{

1394

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1399

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1400

'duration': 244,

1401

'upload_date': '20131011',

1402

'abr': 129.495,

1403

'like_count': int,

1404

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1405

'playable_in_embed': True,

1406

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1407

'view_count': int,

1408

'track': 'The Spark',

1409

'live_status': 'not_live',

1410

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1411

'channel': 'Afrojack',

1412

'tags': 'count:19',

1413

'availability': 'public',

1414

'categories': ['Music'],

1415

'age_limit': 0,

1416

'alt_title': 'The Spark',

1417

'channel_follower_count': int,

1418

'uploader': 'Afrojack',

1419

'uploader_url': 'https://www.youtube.com/@Afrojack',

1420

'uploader_id': '@Afrojack',

1421

},

1422

'params': {

1423

'youtube_include_dash_manifest': True,

1424

'format': '141/bestaudio[ext=m4a]',

1425

},

1426

},

1427

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1428

{

1429

'note': 'Embed allowed age-gate video',

1430

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1435

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1436

'duration': 142,

1437

'upload_date': '20140605',

1438

'age_limit': 18,

1439

'categories': ['Gaming'],

1440

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1441

'availability': 'needs_auth',

1442

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1443

'like_count': int,

1444

'channel': 'The Witcher',

1445

'live_status': 'not_live',

1446

'tags': 'count:17',

1447

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1448

'playable_in_embed': True,

1449

'view_count': int,

1450

'channel_follower_count': int,

1451

'uploader': 'The Witcher',

1452

'uploader_url': 'https://www.youtube.com/@thewitcher',

1453

'uploader_id': '@thewitcher',

1454

'comment_count': int,

1455

'channel_is_verified': True,

1456

'heatmap': 'count:100',

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1461

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1466

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1467

'upload_date': '20200408',

1468

'age_limit': 18,

1469

'availability': 'needs_auth',

1470

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1471

'channel': 'FlyingKitty',

1472

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1473

'view_count': int,

1474

'categories': ['Entertainment'],

1475

'live_status': 'not_live',

1476

'tags': ['Flyingkitty', 'godzilla 2'],

1477

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1478

'like_count': int,

1479

'duration': 177,

1480

'playable_in_embed': True,

1481

'channel_follower_count': int,

1482

'uploader': 'FlyingKitty',

1483

'uploader_url': 'https://www.youtube.com/@FlyingKitty900',

1484

'uploader_id': '@FlyingKitty900',

1485

'comment_count': int,

1486

'channel_is_verified': True,

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1491

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1492

'info_dict': {

1493

'id': 'Tq92D6wQ1mg',

1494

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1495

'ext': 'mp4',

1496

'upload_date': '20191228',

1497

'description': 'md5:17eccca93a786d51bc67646756894066',

1498

'age_limit': 18,

1499

'like_count': int,

1500

'availability': 'needs_auth',

1501

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1502

'view_count': int,

1503

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1504

'channel': 'Projekt Melody',

1505

'live_status': 'not_live',

1506

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1507

'playable_in_embed': True,

1508

'categories': ['Entertainment'],

1509

'duration': 106,

1510

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1511

'comment_count': int,

1512

'channel_follower_count': int,

1513

'uploader': 'Projekt Melody',

1514

'uploader_url': 'https://www.youtube.com/@ProjektMelody',

1515

'uploader_id': '@ProjektMelody',

},

},

{

'note': 'Non-Agegated non-embeddable video',

1520

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1525

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1526

'upload_date': '20130730',

1527

'track': 'Such mich find mich',

1528

'age_limit': 0,

1529

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1530

'like_count': int,

1531

'playable_in_embed': False,

1532

'creator': 'OOMPH!',

1533

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1534

'view_count': int,

1535

'alt_title': 'Such mich find mich',

1536

'duration': 210,

1537

'channel': 'Herr Lurik',

1538

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1539

'categories': ['Music'],

1540

'availability': 'public',

1541

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1542

'live_status': 'not_live',

1543

'artist': 'OOMPH!',

1544

'channel_follower_count': int,

1545

'uploader': 'Herr Lurik',

1546

'uploader_url': 'https://www.youtube.com/@HerrLurik',

1547

'uploader_id': '@HerrLurik',

},

},

{

'note': 'Non-bypassable age-gated video',

1552

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1553

'only_matching': True,

1554

},

1555

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1556

# YouTube Red ad is not captured for creator

1557

{

1558

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1564

'creator': 'deadmau5',

1565

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1566

'title': 'Deadmau5 - Some Chords (HD)',

1567

'alt_title': 'Some Chords',

1568

'availability': 'public',

1569

'tags': 'count:14',

1570

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1571

'view_count': int,

1572

'live_status': 'not_live',

1573

'channel': 'deadmau5',

1574

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1575

'like_count': int,

1576

'track': 'Some Chords',

1577

'artist': 'deadmau5',

1578

'playable_in_embed': True,

1579

'age_limit': 0,

1580

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1581

'categories': ['Music'],

1582

'album': 'Some Chords',

1583

'channel_follower_count': int,

1584

'uploader': 'deadmau5',

1585

'uploader_url': 'https://www.youtube.com/@deadmau5',

1586

'uploader_id': '@deadmau5',

1587

},

1588

'expected_warnings': [

1589

'DASH manifest missing',

1590

]

1591

},

1592

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1593

{

1594

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1600

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1601

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1602

'like_count': int,

1603

'release_timestamp': 1343767800,

1604

'playable_in_embed': True,

1605

'categories': ['Sports'],

1606

'release_date': '20120731',

1607

'channel': 'Olympics',

1608

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1609

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1610

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1611

'age_limit': 0,

1612

'availability': 'public',

1613

'live_status': 'was_live',

1614

'view_count': int,

1615

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1616

'channel_follower_count': int,

1617

'uploader': 'Olympics',

1618

'uploader_url': 'https://www.youtube.com/@Olympics',

1619

'uploader_id': '@Olympics',

1620

'channel_is_verified': True,

1621

},

1622

'params': {

1623

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1633

'duration': 85,

1634

'upload_date': '20110310',

1635

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1636

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1637

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1642

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1643

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1644

'view_count': int,

1645

'categories': ['People & Blogs'],

1646

'like_count': int,

1647

'live_status': 'not_live',

1648

'availability': 'unlisted',

1649

'comment_count': int,

1650

'channel_follower_count': int,

1651

'uploader': '孫ᄋᄅ',

1652

'uploader_url': 'https://www.youtube.com/@AllenMeow',

1653

'uploader_id': '@AllenMeow',

1654

},

1655

},

1656

# url_encoded_fmt_stream_map is empty string

1657

{

1658

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1663

'description': '',

1664

'upload_date': '20150404',

1665

},

1666

'params': {

1667

'skip_download': 'requires avconv',

1668

},

1669

'skip': 'This live event has ended.',

1670

},

1671

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1672

{

1673

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1678

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1679

'duration': 220,

1680

'upload_date': '20150625',

1681

'formats': 'mincount:31',

1682

},

1683

'skip': 'not actual anymore',

1684

},

1685

# DASH manifest with segment_list

1686

{

1687

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1688

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1693

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1694

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1695

},

1696

'params': {

1697

'youtube_include_dash_manifest': True,

1698

'format': '135', # bestvideo

1699

},

1700

'skip': 'This live event has ended.',

1701

},

1702

{

1703

# Multifeed videos (multiple cameras), URL can be of any Camera

1704

# TODO: fix multifeed titles

1705

'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',

1706

'info_dict': {

1707

'id': 'zaPI8MvL8pg',

1708

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',

1709

'description': 'md5:563ccbc698b39298481ca3c571169519',

},

'playlist': [{

'info_dict': {

'id': 'j5yGuxZ8lLU',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',

1716

'description': 'md5:563ccbc698b39298481ca3c571169519',

1717

'duration': 10120,

1718

'channel_follower_count': int,

1719

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1720

'availability': 'public',

1721

'playable_in_embed': True,

1722

'upload_date': '20131105',

1723

'categories': ['Gaming'],

1724

'live_status': 'was_live',

1725

'tags': 'count:24',

1726

'release_timestamp': 1383701910,

1727

'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',

1728

'comment_count': int,

1729

'age_limit': 0,

1730

'like_count': int,

1731

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1732

'channel': 'WiiLikeToPlay',

1733

'view_count': int,

1734

'release_date': '20131106',

1735

'uploader': 'WiiLikeToPlay',

1736

'uploader_id': '@WLTP',

1737

'uploader_url': 'https://www.youtube.com/@WLTP',

},

}, {

'info_dict': {

'id': 'zaPI8MvL8pg',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',

1744

'availability': 'public',

1745

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1746

'channel': 'WiiLikeToPlay',

1747

'channel_follower_count': int,

1748

'description': 'md5:563ccbc698b39298481ca3c571169519',

'duration': 10108,

'age_limit': 0,

'like_count': int,

'tags': 'count:24',

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1754

'release_timestamp': 1383701915,

1755

'comment_count': int,

1756

'upload_date': '20131105',

1757

'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',

1758

'release_date': '20131106',

1759

'playable_in_embed': True,

1760

'live_status': 'was_live',

1761

'categories': ['Gaming'],

1762

'view_count': int,

1763

'uploader': 'WiiLikeToPlay',

1764

'uploader_id': '@WLTP',

1765

'uploader_url': 'https://www.youtube.com/@WLTP',

},

}, {

'info_dict': {

'id': 'R7r3vfO7Hao',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',

1772

'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',

1773

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1774

'like_count': int,

1775

'availability': 'public',

1776

'playable_in_embed': True,

1777

'upload_date': '20131105',

1778

'description': 'md5:563ccbc698b39298481ca3c571169519',

1779

'channel_follower_count': int,

1780

'tags': 'count:24',

1781

'release_date': '20131106',

1782

'comment_count': int,

1783

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1784

'channel': 'WiiLikeToPlay',

1785

'categories': ['Gaming'],

1786

'release_timestamp': 1383701914,

1787

'live_status': 'was_live',

'age_limit': 0,

'duration': 10128,

'view_count': int,

'uploader': 'WiiLikeToPlay',

1792

'uploader_id': '@WLTP',

1793

'uploader_url': 'https://www.youtube.com/@WLTP',

1794

},

1795

}],

1796

'params': {'skip_download': True},

1797

},

1798

{

1799

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1800

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1801

'info_dict': {

1802

'id': 'gVfLd0zydlo',

1803

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1804

},

1805

'playlist_count': 2,

1806

'skip': 'Not multifeed anymore',

1807

},

1808

{

1809

'url': 'https://vid.plus/FlRa-iH7PGw',

1810

'only_matching': True,

1811

},

1812

{

1813

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1814

'only_matching': True,

1815

},

1816

{

1817

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1818

# Also tests cut-off URL expansion in video description (see

1819

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1820

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1821

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1826

'alt_title': 'Dark Walk',

1827

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1828

'duration': 133,

1829

'upload_date': '20151119',

1830

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1831

'track': 'Dark Walk',

1832

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1833

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1834

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1835

'categories': ['Film & Animation'],

1836

'view_count': int,

1837

'live_status': 'not_live',

1838

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1839

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1840

'tags': 'count:13',

1841

'availability': 'public',

1842

'channel': 'IronSoulElf',

1843

'playable_in_embed': True,

1844

'like_count': int,

1845

'age_limit': 0,

1846

'channel_follower_count': int

1847

},

1848

'params': {

1849

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1854

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1855

'only_matching': True,

1856

},

1857

{

1858

# Video with yt:stretch=17:0

1859

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1864

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1865

'upload_date': '20151107',

1866

},

1867

'params': {

1868

'skip_download': True,

1869

},

1870

'skip': 'This video does not exist.',

1871

},

1872

{

1873

# Video with incomplete 'yt:stretch=16:'

1874

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1875

'only_matching': True,

1876

},

1877

{

1878

# Video licensed under Creative Commons

1879

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1884

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1885

'duration': 721,

1886

'upload_date': '20150128',

1887

'license': 'Creative Commons Attribution license (reuse allowed)',

1888

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1889

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1890

'like_count': int,

1891

'age_limit': 0,

1892

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1893

'channel': 'The Berkman Klein Center for Internet & Society',

1894

'availability': 'public',

1895

'view_count': int,

1896

'categories': ['Education'],

1897

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1898

'live_status': 'not_live',

1899

'playable_in_embed': True,

1900

'channel_follower_count': int,

1901

'chapters': list,

1902

'uploader': 'The Berkman Klein Center for Internet & Society',

1903

'uploader_id': '@BKCHarvard',

1904

'uploader_url': 'https://www.youtube.com/@BKCHarvard',

1905

},

1906

'params': {

1907

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1916

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1917

'duration': 4060,

1918

'upload_date': '20151120',

1919

'license': 'Creative Commons Attribution license (reuse allowed)',

1920

'playable_in_embed': True,

1921

'tags': 'count:12',

1922

'like_count': int,

1923

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1924

'age_limit': 0,

1925

'availability': 'public',

1926

'categories': ['News & Politics'],

1927

'channel': 'Bernie Sanders',

1928

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1929

'view_count': int,

1930

'live_status': 'not_live',

1931

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1932

'comment_count': int,

1933

'channel_follower_count': int,

1934

'chapters': list,

1935

'uploader': 'Bernie Sanders',

1936

'uploader_url': 'https://www.youtube.com/@BernieSanders',

1937

'uploader_id': '@BernieSanders',

1938

'channel_is_verified': True,

1939

'heatmap': 'count:100',

1940

},

1941

'params': {

1942

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1947

'only_matching': True,

1948

},

1949

{

1950

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1951

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1952

'only_matching': True,

1953

},

1954

{

1955

# Rental video preview

1956

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1961

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1962

'upload_date': '20150811',

1963

'license': 'Standard YouTube License',

1964

},

1965

'params': {

1966

'skip_download': True,

1967

},

1968

'skip': 'This video is not available.',

1969

},

1970

{

1971

# YouTube Red video with episode data

1972

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1977

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1978

'duration': 2085,

1979

'upload_date': '20170118',

1980

'series': 'Mind Field',

1981

'season_number': 1,

1982

'episode_number': 1,

1983

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1984

'tags': 'count:12',

1985

'view_count': int,

1986

'availability': 'public',

1987

'age_limit': 0,

1988

'channel': 'Vsauce',

1989

'episode': 'Episode 1',

1990

'categories': ['Entertainment'],

1991

'season': 'Season 1',

1992

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1993

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1994

'like_count': int,

1995

'playable_in_embed': True,

1996

'live_status': 'not_live',

1997

'channel_follower_count': int,

1998

'uploader': 'Vsauce',

1999

'uploader_url': 'https://www.youtube.com/@Vsauce',

2000

'uploader_id': '@Vsauce',

2001

'comment_count': int,

2002

'channel_is_verified': True,

2003

},

2004

'params': {

2005

'skip_download': True,

2006

},

2007

'expected_warnings': [

2008

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

2013

# as inappropriate or offensive to some audiences.

2014

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

2019

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

2020

'duration': 965,

2021

'upload_date': '20140124',

2022

},

2023

'params': {

2024

'skip_download': True,

2025

},

2026

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

2031

'only_matching': True,

2032

},

2033

{

2034

# geo restricted to JP

2035

'url': 'sJL6WA-aGkQ',

2036

'only_matching': True,

2037

},

2038

{

2039

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

2040

'only_matching': True,

2041

},

2042

{

2043

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

2044

'only_matching': True,

2045

},

2046

{

2047

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

2048

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

2049

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

2054

'only_matching': True,

2055

},

2056

{

2057

# Video with unsupported adaptive stream type formats

2058

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

2063

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

2064

'duration': 433,

2065

'upload_date': '20130923',

2066

'formats': 'maxcount:10',

2067

},

2068

'params': {

2069

'skip_download': True,

2070

'youtube_include_dash_manifest': False,

2071

},

2072

'skip': 'not actual anymore',

2073

},

2074

{

2075

# Youtube Music Auto-generated description

2076

# TODO: fix metadata extraction

2077

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

2082

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

2083

'upload_date': '20190312',

2084

'artists': ['Stephen'],

2085

'creators': ['Stephen'],

2086

'track': 'Voyeur Girl',

2087

'album': 'it\'s too much love to know my dear',

2088

'release_date': '20190313',

2089

'alt_title': 'Voyeur Girl',

2090

'view_count': int,

2091

'playable_in_embed': True,

2092

'like_count': int,

2093

'categories': ['Music'],

2094

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

2095

'channel': 'Stephen', # TODO: should be "Stephen - Topic"

2096

'uploader': 'Stephen',

2097

'availability': 'public',

2098

'duration': 169,

2099

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

2100

'age_limit': 0,

2101

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

2102

'tags': 'count:11',

2103

'live_status': 'not_live',

2104

'channel_follower_count': int

2105

},

2106

'params': {

2107

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

2112

'only_matching': True,

2113

},

2114

{

2115

# invalid -> valid video id redirection

2116

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

2121

'description': 'md5:bf577a41da97918e94fa9798d9228825',

2122

'upload_date': '20090125',

2123

'artist': 'Panjabi MC',

2124

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

2125

'album': 'Beware of the Boys (Mundian To Bach Ke)',

2126

},

2127

'params': {

2128

'skip_download': True,

2129

},

2130

'skip': 'Video unavailable',

2131

},

2132

{

2133

# empty description results in an empty string

2134

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2141

'view_count': int,

2142

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2143

'like_count': int,

2144

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2145

'tags': [],

2146

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2147

'availability': 'public',

2148

'age_limit': 0,

2149

'categories': ['Pets & Animals'],

2150

'duration': 7,

2151

'playable_in_embed': True,

2152

'live_status': 'not_live',

2153

'channel': 'l\'Or Vert asbl',

2154

'channel_follower_count': int,

2155

'uploader': 'l\'Or Vert asbl',

2156

'uploader_url': 'https://www.youtube.com/@ElevageOrVert',

2157

'uploader_id': '@ElevageOrVert',

2158

},

2159

'params': {

2160

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2165

# see [2] for an example with '};' inside ytInitialPlayerResponse

2166

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2167

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2168

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2173

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2174

'upload_date': '20130831',

2175

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2176

'like_count': int,

2177

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2178

'live_status': 'not_live',

2179

'categories': ['Education'],

2180

'availability': 'public',

2181

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2182

'tags': 'count:12',

2183

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2188

'comment_count': int,

2189

'channel_follower_count': int,

2190

'chapters': list,

2191

'uploader': 'kudvenkat',

2192

'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',

2193

'uploader_id': '@Csharp-video-tutorialsBlogspot',

2194

'channel_is_verified': True,

2195

'heatmap': 'count:100',

2196

},

2197

'params': {

2198

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2203

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2204

'only_matching': True,

2205

},

2206

{

2207

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2208

'only_matching': True,

2209

},

2210

{

2211

# https://github.com/ytdl-org/youtube-dl/pull/28094

2212

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2218

'upload_date': '20141120',

2219

'artist': 'The Cinematic Orchestra',

2220

'track': 'Burn Out',

2221

'album': 'Every Day',

2222

'like_count': int,

2223

'live_status': 'not_live',

2224

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2229

'creator': 'The Cinematic Orchestra',

2230

'channel': 'The Cinematic Orchestra',

2231

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2232

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2233

'availability': 'public',

2234

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2235

'categories': ['Music'],

2236

'playable_in_embed': True,

2237

'channel_follower_count': int,

2238

'uploader': 'The Cinematic Orchestra',

2239

'comment_count': int,

2240

},

2241

'params': {

2242

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2247

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2248

'only_matching': True,

2249

},

2250

{

2251

# controversial video, requires bpctr/contentCheckOk

2252

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2257

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2258

'upload_date': '20140716',

2259

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2260

'duration': 170,

2261

'categories': ['News & Politics'],

2262

'view_count': int,

2263

'channel': 'CBS Mornings',

2264

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2265

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2266

'age_limit': 18,

2267

'availability': 'needs_auth',

2268

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2269

'like_count': int,

2270

'live_status': 'not_live',

2271

'playable_in_embed': True,

2272

'channel_follower_count': int,

2273

'uploader': 'CBS Mornings',

2274

'uploader_url': 'https://www.youtube.com/@CBSMornings',

2275

'uploader_id': '@CBSMornings',

2276

'comment_count': int,

2277

'channel_is_verified': True,

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2282

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2287

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2288

'upload_date': '20201120',

2289

'duration': 1456,

2290

'categories': ['Travel & Events'],

2291

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2292

'view_count': int,

2293

'channel': 'Walk around Japan',

2294

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2295

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2296

'age_limit': 0,

2297

'availability': 'public',

2298

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2299

'live_status': 'not_live',

2300

'playable_in_embed': True,

2301

'channel_follower_count': int,

2302

'uploader': 'Walk around Japan',

2303

'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',

2304

'uploader_id': '@walkaroundjapan7124',

2305

},

2306

'params': {

2307

'skip_download': True,

2308

},

2309

}, {

2310

# Has multiple audio streams

2311

'url': 'WaOKSUlf4TM',

2312

'only_matching': True

2313

}, {

2314

# Requires Premium: has format 141 when requested using YTM url

2315

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2316

'only_matching': True

2317

}, {

2318

# multiple subtitles with same lang_code

2319

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2320

'only_matching': True,

2321

}, {

2322

# Force use android client fallback

2323

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2324

'info_dict': {

2325

'id': 'YOelRv7fMxY',

2326

'title': 'DIGGING A SECRET TUNNEL Part 1',

2327

'ext': '3gp',

2328

'upload_date': '20210624',

2329

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2330

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2331

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2332

'duration': 596,

2333

'categories': ['Entertainment'],

2334

'view_count': int,

2335

'channel': 'colinfurze',

2336

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2337

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2338

'age_limit': 0,

2339

'availability': 'public',

2340

'like_count': int,

2341

'live_status': 'not_live',

2342

'playable_in_embed': True,

2343

'channel_follower_count': int,

2344

'chapters': list,

2345

'uploader': 'colinfurze',

2346

'uploader_url': 'https://www.youtube.com/@colinfurze',

2347

'uploader_id': '@colinfurze',

2348

'comment_count': int,

2349

'channel_is_verified': True,

2350

'heatmap': 'count:100',

2351

},

2352

'params': {

2353

'format': '17', # 3gp format available on android

2354

'extractor_args': {'youtube': {'player_client': ['android']}},

2355

},

2356

'skip': 'android client broken',

2357

},

2358

{

2359

# Skip download of additional client configs (remix client config in this case)

2360

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2361

'only_matching': True,

2362

'params': {

2363

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2368

'only_matching': True,

2369

}, {

2370

'note': 'Storyboards',

2371

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2377

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2378

'upload_date': '20140324',

2379

'like_count': int,

2380

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2381

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2382

'view_count': int,

2383

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2384

'playable_in_embed': True,

2385

'tags': 'count:12',

2386

'availability': 'public',

2387

'channel': 'SciShow',

2388

'live_status': 'not_live',

2389

'duration': 248,

2390

'categories': ['Education'],

2391

'age_limit': 0,

2392

'channel_follower_count': int,

2393

'chapters': list,

2394

'uploader': 'SciShow',

2395

'uploader_url': 'https://www.youtube.com/@SciShow',

2396

'uploader_id': '@SciShow',

2397

'comment_count': int,

2398

'channel_is_verified': True,

2399

'heatmap': 'count:100',

2400

}, 'params': {'format': 'mhtml', 'skip_download': True}

2401

}, {

2402

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2403

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2408

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2409

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2410

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2415

'tags': 'count:23',

2416

'playable_in_embed': True,

2417

'live_status': 'not_live',

2418

'upload_date': '20220103',

2419

'like_count': int,

2420

'availability': 'public',

2421

'channel': 'Leon Nguyen',

2422

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2423

'comment_count': int,

2424

'channel_follower_count': int,

2425

'uploader': 'Leon Nguyen',

2426

'uploader_url': 'https://www.youtube.com/@LeonNguyen',

2427

'uploader_id': '@LeonNguyen',

2428

'heatmap': 'count:100',

2429

}

2430

}, {

2431

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2432

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2437

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2438

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2439

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2444

'tags': 'count:23',

2445

'playable_in_embed': True,

2446

'live_status': 'not_live',

2447

'upload_date': '20220102',

2448

'like_count': int,

2449

'availability': 'public',

2450

'channel': 'Leon Nguyen',

2451

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2452

'comment_count': int,

2453

'channel_follower_count': int,

2454

'uploader': 'Leon Nguyen',

2455

'uploader_url': 'https://www.youtube.com/@LeonNguyen',

2456

'uploader_id': '@LeonNguyen',

2457

'heatmap': 'count:100',

2458

},

2459

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2460

}, {

2461

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2462

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2467

'description': 'md5:978296ec9783a031738b684d4ebf302d',

2468

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2469

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2474

'tags': 'count:26',

2475

'playable_in_embed': True,

2476

'live_status': 'not_live',

2477

'release_timestamp': 1641172509,

2478

'release_date': '20220103',

2479

'upload_date': '20220103',

2480

'like_count': int,

2481

'availability': 'public',

2482

'channel': 'Quackity',

2483

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2484

'channel_follower_count': int,

2485

'uploader': 'Quackity',

2486

'uploader_id': '@Quackity',

2487

'uploader_url': 'https://www.youtube.com/@Quackity',

2488

'comment_count': int,

2489

'channel_is_verified': True,

2490

'heatmap': 'count:100',

2491

}

2492

},

2493

{ # continuous livestream. Microformat upload date should be preferred.

2494

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2495

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2496

'info_dict': {

2497

'id': 'kgx4WGK0oNU',

2498

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2499

'ext': 'mp4',

2500

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2501

'availability': 'public',

2502

'age_limit': 0,

2503

'release_timestamp': 1637975704,

2504

'upload_date': '20210619',

2505

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2506

'live_status': 'is_live',

2507

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2508

'channel': 'Abao in Tokyo',

2509

'channel_follower_count': int,

2510

'release_date': '20211127',

2511

'tags': 'count:39',

2512

'categories': ['People & Blogs'],

2513

'like_count': int,

2514

'view_count': int,

2515

'playable_in_embed': True,

2516

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2517

'concurrent_view_count': int,

2518

'uploader': 'Abao in Tokyo',

2519

'uploader_url': 'https://www.youtube.com/@abaointokyo',

2520

'uploader_id': '@abaointokyo',

2521

},

2522

'params': {'skip_download': True}

2523

}, {

2524

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2529

'upload_date': '20220323',

2530

'like_count': int,

2531

'availability': 'unlisted',

2532

'channel': 'Lesmiscore',

2533

'thumbnail': r're:^https?://.*\.jpg',

2534

'age_limit': 0,

2535

'categories': ['Music'],

2536

'view_count': int,

2537

'description': '',

2538

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2539

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2540

'live_status': 'not_live',

2541

'playable_in_embed': True,

2542

'channel_follower_count': int,

2543

'duration': 6,

2544

'tags': [],

2545

'uploader_id': '@lesmiscore',

2546

'uploader': 'Lesmiscore',

2547

'uploader_url': 'https://www.youtube.com/@lesmiscore',

2548

}

2549

}, {

2550

# Prefer primary title+description language metadata by default

2551

# Do not prefer translated description if primary is empty

2552

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2557

'description': '',

2558

'channel': 'cole-dlp-test-acc',

2559

'tags': [],

2560

'view_count': int,

2561

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2562

'like_count': int,

2563

'playable_in_embed': True,

2564

'availability': 'unlisted',

2565

'thumbnail': r're:^https?://.*\.jpg',

2566

'age_limit': 0,

2567

'duration': 5,

2568

'live_status': 'not_live',

2569

'upload_date': '20220908',

2570

'categories': ['People & Blogs'],

2571

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2572

'uploader_url': 'https://www.youtube.com/@coletdjnz',

2573

'uploader_id': '@coletdjnz',

2574

'uploader': 'cole-dlp-test-acc',

2575

},

2576

'params': {'skip_download': True}

2577

}, {

2578

# Extractor argument: prefer translated title+description

2579

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2584

'tags': [],

2585

'duration': 5,

2586

'live_status': 'not_live',

2587

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2588

'upload_date': '20220728',

2589

'view_count': int,

2590

'categories': ['People & Blogs'],

2591

'thumbnail': r're:^https?://.*\.jpg',

2592

'title': 'dlp test video title translated (fr)',

2593

'availability': 'public',

2594

'age_limit': 0,

2595

'description': 'dlp test video description translated (fr)',

2596

'playable_in_embed': True,

2597

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2598

'uploader_url': 'https://www.youtube.com/@coletdjnz',

2599

'uploader_id': '@coletdjnz',

2600

'uploader': 'cole-dlp-test-acc',

2601

},

2602

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2603

'expected_warnings': [r'Preferring "fr" translated fields'],

2604

}, {

2605

'note': '6 channel audio',

2606

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2607

'only_matching': True,

2608

}, {

2609

'note': 'Multiple HLS formats with same itag',

2610

'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',

'info_dict': {

'id': 'kX3nB4PpJko',

'ext': 'mp4',

'categories': ['Entertainment'],

2615

'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',

2616

'live_status': 'not_live',

2617

'duration': 937,

2618

'channel_follower_count': int,

2619

'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',

2620

'title': 'Last To Take Hand Off Jet, Keeps It!',

2621

'channel': 'MrBeast',

2622

'playable_in_embed': True,

2623

'view_count': int,

2624

'upload_date': '20221112',

2625

'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',

2626

'age_limit': 0,

2627

'availability': 'public',

2628

'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',

2629

'like_count': int,

2630

'tags': [],

2631

'uploader': 'MrBeast',

2632

'uploader_url': 'https://www.youtube.com/@MrBeast',

2633

'uploader_id': '@MrBeast',

2634

'comment_count': int,

2635

'channel_is_verified': True,

2636

'heatmap': 'count:100',

2637

},

2638

'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},

2639

}, {

2640

'note': 'Audio formats with Dynamic Range Compression',

2641

'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',

'info_dict': {

'id': 'Tq92D6wQ1mg',

'ext': 'webm',

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

2646

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2647

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2648

'channel_follower_count': int,

2649

'description': 'md5:17eccca93a786d51bc67646756894066',

2650

'upload_date': '20191228',

2651

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

2652

'playable_in_embed': True,

2653

'like_count': int,

2654

'categories': ['Entertainment'],

2655

'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',

2656

'age_limit': 18,

2657

'channel': 'Projekt Melody',

2658

'view_count': int,

2659

'availability': 'needs_auth',

2660

'comment_count': int,

2661

'live_status': 'not_live',

2662

'duration': 106,

2663

'uploader': 'Projekt Melody',

2664

'uploader_id': '@ProjektMelody',

2665

'uploader_url': 'https://www.youtube.com/@ProjektMelody',

2666

},

2667

'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},

2668

},

2669

{

2670

'url': 'https://www.youtube.com/live/qVv6vCqciTM',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'comment_count': int,

2676

'chapters': 'count:13',

2677

'upload_date': '20221223',

2678

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

2679

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2680

'like_count': int,

2681

'release_date': '20221223',

2682

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

2683

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

2684

'view_count': int,

2685

'playable_in_embed': True,

2686

'duration': 4438,

2687

'availability': 'public',

2688

'channel_follower_count': int,

2689

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2690

'categories': ['Entertainment'],

2691

'live_status': 'was_live',

2692

'release_timestamp': 1671793345,

2693

'channel': 'さなちゃんねる',

2694

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

2695

'uploader': 'さなちゃんねる',

2696

'uploader_url': 'https://www.youtube.com/@sana_natori',

2697

'uploader_id': '@sana_natori',

2698

'channel_is_verified': True,

2699

'heatmap': 'count:100',

},

},

{

# Fallbacks when webpage and web client is unavailable

2704

'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',

'info_dict': {

'id': 'wSSmNUl9Snw',

'ext': 'mp4',

# 'categories': ['Science & Technology'],

2709

'view_count': int,

2710

'chapters': 'count:2',

2711

'channel': 'Scott Manley',

2712

'like_count': int,

2713

'age_limit': 0,

2714

# 'availability': 'public',

2715

'channel_follower_count': int,

2716

'live_status': 'not_live',

2717

'upload_date': '20170831',

2718

'duration': 682,

2719

'tags': 'count:8',

2720

'uploader_url': 'https://www.youtube.com/@scottmanley',

2721

'description': 'md5:f4bed7b200404b72a394c2f97b782c02',

2722

'uploader': 'Scott Manley',

2723

'uploader_id': '@scottmanley',

2724

'title': 'The Computer Hack That Saved Apollo 14',

2725

'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',

2726

'thumbnail': r're:^https?://.*\.webp',

2727

'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',

2728

'playable_in_embed': True,

2729

'comment_count': int,

2730

'channel_is_verified': True,

2731

'heatmap': 'count:100',

2732

},

2733

'params': {

2734

'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},

},

},

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2741

{

2742

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2743

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2748

'upload_date': '20080526',

2749

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2750

'age_limit': 0,

2751

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2752

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2753

'playable_in_embed': True,

2754

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2755

'like_count': int,

2756

'comment_count': int,

2757

'channel': 'Christopher Sykes',

2758

'live_status': 'not_live',

2759

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2760

'availability': 'public',

2761

'duration': 195,

2762

'view_count': int,

2763

'categories': ['Science & Technology'],

2764

'channel_follower_count': int,

2765

'uploader': 'Christopher Sykes',

2766

'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',

2767

'uploader_id': '@ChristopherSykesDocumentaries',

2768

'heatmap': 'count:100',

2769

},

2770

'params': {

2771

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2778

from ..utils import parse_qs

2779

2780

qs = parse_qs(url)

2781

if qs.get('list', [None])[0]:

2782

return False

2783

return super().suitable(url)

2784

2785

def __init__(self, *args, **kwargs):

2786

super().__init__(*args, **kwargs)

2787

self._code_cache = {}

2788

self._player_cache = {}

2789

2790

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2791

lock = threading.Lock()

2792

start_time = time.time()

2793

formats = [f for f in formats if f.get('is_from_start')]

2794

2795

def refetch_manifest(format_id, delay):

2796

nonlocal formats, start_time, is_live

2797

if time.time() <= start_time + delay:

2798

return

2799

2800

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2801

video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)

2802

microformats = traverse_obj(

2803

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2804

expected_type=dict)

2805

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2806

is_live = live_status == 'is_live'

2807

start_time = time.time()

2808

2809

def mpd_feed(format_id, delay):

2810

"""

2811

@returns (manifest_url, manifest_stream_number, is_live) or None

2812

"""

2813

for retry in self.RetryManager(fatal=False):

2814

with lock:

2815

refetch_manifest(format_id, delay)

2816

2817

f = next((f for f in formats if f['format_id'] == format_id), None)

2818

if not f:

2819

if not is_live:

2820

retry.error = f'{video_id}: Video is no longer live'

2821

else:

2822

retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'

2823

continue

2824

return f['manifest_url'], f['manifest_stream_number'], is_live

return None

for f in formats:

f['is_live'] = is_live

2829

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2830

live_start_time, mpd_feed, not is_live and f.copy())

2831

if is_live:

2832

f['fragments'] = gen

2833

f['protocol'] = 'http_dash_segments_generator'

2834

else:

2835

f['fragments'] = LazyList(gen({}))

2836

del f['is_from_start']

2837

2838

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2839

FETCH_SPAN, MAX_DURATION = 5, 432000

2840

2841

mpd_url, stream_number, is_live = None, None, True

2842

2843

begin_index = 0

2844

download_start_time = ctx.get('start') or time.time()

2845

2846

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2847

if lack_early_segments:

2848

self.report_warning(bug_reports_message(

2849

'Starting download from the last 120 hours of the live stream since '

2850

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2851

lack_early_segments = True

2852

2853

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2854

fragments, fragment_base_url = None, None

2855

2856

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2857

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2858

# Obtain from MPD's maximum seq value

2859

old_mpd_url = mpd_url

2860

last_error = ctx.pop('last_error', None)

2861

expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403

2862

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2863

or (mpd_url, stream_number, False))

2864

if not refresh_sequence:

2865

if expire_fast and not is_live:

2866

return False, last_seq

2867

elif old_mpd_url == mpd_url:

2868

return True, last_seq

2869

if manifestless_orig_fmt:

2870

fmt_info = manifestless_orig_fmt

2871

else:

2872

try:

2873

fmts, _ = self._extract_mpd_formats_and_subtitles(

2874

mpd_url, None, note=False, errnote=False, fatal=False)

2875

except ExtractorError:

2876

fmts = None

2877

if not fmts:

2878

no_fragment_score += 2

2879

return False, last_seq

2880

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2881

fragments = fmt_info['fragments']

2882

fragment_base_url = fmt_info['fragment_base_url']

2883

assert fragment_base_url

2884

2885

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2886

return True, _last_seq

2887

2888

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2889

while is_live:

2890

fetch_time = time.time()

2891

if no_fragment_score > 30:

2892

return

2893

if last_segment_url:

2894

# Obtain from "X-Head-Seqnum" header value from each segment

2895

try:

2896

urlh = self._request_webpage(

2897

last_segment_url, None, note=False, errnote=False, fatal=False)

2898

except ExtractorError:

2899

urlh = None

2900

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2901

if last_seq is None:

2902

no_fragment_score += 2

2903

last_segment_url = None

2904

continue

2905

else:

2906

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2907

no_fragment_score += 2

2908

if not should_continue:

2909

continue

2910

2911

if known_idx > last_seq:

2912

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2918

# skip from the start when it's negative value

2919

known_idx = last_seq + begin_index

2920

if lack_early_segments:

2921

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2922

try:

2923

for idx in range(known_idx, last_seq):

2924

# do not update sequence here or you'll get skipped some part of it

2925

should_continue, _ = _extract_sequence_from_mpd(False, False)

2926

if not should_continue:

2927

known_idx = idx - 1

2928

raise ExtractorError('breaking out of outer loop')

2929

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2930

yield {

2931

'url': last_segment_url,

2932

'fragment_count': last_seq,

2933

}

2934

if known_idx == last_seq:

2935

no_fragment_score += 5

2936

else:

2937

no_fragment_score = 0

2938

known_idx = last_seq

2939

except ExtractorError:

2940

continue

2941

2942

if manifestless_orig_fmt:

2943

# Stop at the first iteration if running for post-live manifestless;

2944

# fragment count no longer increase since it starts

2945

break

2946

2947

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2948

2949

def _extract_player_url(self, *ytcfgs, webpage=None):

2950

player_url = traverse_obj(

2951

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2952

get_all=False, expected_type=str)

2953

if not player_url:

2954

return

2955

return urljoin('https://www.youtube.com', player_url)

2956

2957

def _download_player_url(self, video_id, fatal=False):

2958

res = self._download_webpage(

2959

'https://www.youtube.com/iframe_api',

2960

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2961

if res:

2962

player_version = self._search_regex(

2963

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2964

if player_version:

2965

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2966

2967

def _signature_cache_id(self, example_sig):

2968

""" Return a string representation of a signature """

2969

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2970

2971

@classmethod

2972

def _extract_player_info(cls, player_url):

2973

for player_re in cls._PLAYER_INFO_RE:

2974

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2979

return id_m.group('id')

2980

2981

def _load_player(self, video_id, player_url, fatal=True):

2982

player_id = self._extract_player_info(player_url)

2983

if player_id not in self._code_cache:

2984

code = self._download_webpage(

2985

player_url, video_id, fatal=fatal,

2986

note='Downloading player ' + player_id,

2987

errnote='Download of %s failed' % player_url)

2988

if code:

2989

self._code_cache[player_id] = code

2990

return self._code_cache.get(player_id)

2991

2992

def _extract_signature_function(self, video_id, player_url, example_sig):

2993

player_id = self._extract_player_info(player_url)

2994

2995

# Read from filesystem cache

2996

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2997

assert os.path.basename(func_id) == func_id

2998

2999

self.write_debug(f'Extracting signature function {func_id}')

3000

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

3001

3002

if not cache_spec:

3003

code = self._load_player(video_id, player_url)

3004

if code:

3005

res = self._parse_sig_js(code)

3006

test_string = ''.join(map(chr, range(len(example_sig))))

3007

cache_spec = [ord(c) for c in res(test_string)]

3008

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

3009

3010

return lambda s: ''.join(s[i] for i in cache_spec)

3011

3012

def _print_sig_code(self, func, example_sig):

3013

if not self.get_param('youtube_print_sig_code'):

3014

return

3015

3016

def gen_sig_code(idxs):

3017

def _genslice(start, end, step):

3018

starts = '' if start == 0 else str(start)

3019

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

3020

steps = '' if step == 1 else (':%d' % step)

3021

return f's[{starts}{ends}{steps}]'

3022

3023

step = None

3024

# Quelch pyflakes warnings - start will be set when step is set

3025

start = '(Never used)'

3026

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

3031

step = None

3032

continue

3033

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

3043

3044

test_string = ''.join(map(chr, range(len(example_sig))))

3045

cache_res = func(test_string)

3046

cache_spec = [ord(c) for c in cache_res]

3047

expr_code = ' + '.join(gen_sig_code(cache_spec))

3048

signature_id_tuple = '(%s)' % (

3049

', '.join(str(len(p)) for p in example_sig.split('.')))

3050

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

3051

' return %s\n') % (signature_id_tuple, expr_code)

3052

self.to_screen('Extracted signature function:\n' + code)

3053

3054

def _parse_sig_js(self, jscode):

3055

funcname = self._search_regex(

3056

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3057

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3058

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

3059

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

3060

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$)?',

3061

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

3062

# Obsolete patterns

3063

r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3064

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

3065

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3066

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3067

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3068

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

3069

jscode, 'Initial JS player signature function name', group='sig')

3070

3071

jsi = JSInterpreter(jscode)

3072

initial_function = jsi.extract_function(funcname)

3073

return lambda s: initial_function([s])

3074

3075

def _cached(self, func, *cache_id):

3076

def inner(*args, **kwargs):

3077

if cache_id not in self._player_cache:

3078

try:

3079

self._player_cache[cache_id] = func(*args, **kwargs)

3080

except ExtractorError as e:

3081

self._player_cache[cache_id] = e

3082

except Exception as e:

3083

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

3084

3085

ret = self._player_cache[cache_id]

3086

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

3092

"""Turn the encrypted s field into a working signature"""

3093

extract_sig = self._cached(

3094

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

3095

func = extract_sig(video_id, player_url, s)

3096

self._print_sig_code(func, s)

3097

return func(s)

3098

3099

def _decrypt_nsig(self, s, video_id, player_url):

3100

"""Turn the encrypted n field into a working signature"""

3101

if player_url is None:

3102

raise ExtractorError('Cannot decrypt nsig without player_url')

3103

player_url = urljoin('https://www.youtube.com', player_url)

3104

3105

try:

3106

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

3107

except ExtractorError as e:

3108

raise ExtractorError('Unable to extract nsig function code', cause=e)

3109

if self.get_param('youtube_print_sig_code'):

3110

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

3111

3112

try:

3113

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

3114

ret = extract_nsig(jsi, func_code)(s)

3115

except JSInterpreter.Exception as e:

3116

try:

3117

jsi = PhantomJSwrapper(self, timeout=5000)

3118

except ExtractorError:

3119

raise e

3120

self.report_warning(

3121

f'Native nsig extraction failed: Trying with PhantomJS\n'

3122

f' n = {s} ; player = {player_url}', video_id)

3123

self.write_debug(e, only_once=True)

3124

3125

args, func_body = func_code

3126

ret = jsi.execute(

3127

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

3128

video_id=video_id, note='Executing signature code').strip()

3129

3130

self.write_debug(f'Decrypted nsig {s} => {ret}')

3131

return ret

3132

3133

def _extract_n_function_name(self, jscode):

3134

funcname, idx = self._search_regex(

3135

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

3136

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

3141

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,

3142

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

3143

3144

def _extract_n_function_code(self, video_id, player_url):

3145

player_id = self._extract_player_info(player_url)

3146

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

3147

jscode = func_code or self._load_player(video_id, player_url)

3148

jsi = JSInterpreter(jscode)

3149

3150

if func_code:

3151

return jsi, player_id, func_code

3152

3153

func_name = self._extract_n_function_name(jscode)

3154

3155

# For redundancy

3156

func_code = self._search_regex(

3157

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

3158

# NB: The end of the regex is intentionally kept strict

3159

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

3160

jscode, 'nsig function', group=('var', 'code'), default=None)

3161

if func_code:

3162

func_code = ([func_code[0]], func_code[1])

3163

else:

3164

self.write_debug('Extracting nsig function with jsinterp')

3165

func_code = jsi.extract_function_code(func_name)

3166

3167

self.cache.store('youtube-nsig', player_id, func_code)

3168

return jsi, player_id, func_code

3169

3170

def _extract_n_function_from_code(self, jsi, func_code):

3171

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

3177

raise

3178

except Exception as e:

3179

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

3180

3181

if ret.startswith('enhanced_except_'):

3182

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

3188

"""

3189

Extract signatureTimestamp (sts)

3190

Required to tell API what sig/player version is in use.

3191

"""

3192

sts = None

3193

if isinstance(ytcfg, dict):

3194

sts = int_or_none(ytcfg.get('STS'))

3195

3196

if not sts:

3197

# Attempt to extract from player

3198

if player_url is None:

3199

error_msg = 'Cannot extract signature timestamp without player_url.'

3200

if fatal:

3201

raise ExtractorError(error_msg)

3202

self.report_warning(error_msg)

3203

return

3204

code = self._load_player(video_id, player_url, fatal=fatal)

3205

if code:

3206

sts = int_or_none(self._search_regex(

3207

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

3208

'JS player signature timestamp', group='sts', fatal=fatal))

3209

return sts

3210

3211

def _mark_watched(self, video_id, player_responses):

3212

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

3213

label = 'fully ' if is_full else ''

3214

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

3215

expected_type=url_or_none)

3216

if not url:

3217

self.report_warning(f'Unable to mark {label}watched')

3218

return

3219

parsed_url = urllib.parse.urlparse(url)

3220

qs = urllib.parse.parse_qs(parsed_url.query)

3221

3222

# cpn generation algorithm is reverse engineered from base.js.

3223

# In fact it works even with dummy cpn.

3224

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

3225

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

3226

3227

# # more consistent results setting it to right before the end

3228

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

3239

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

3246

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

3247

3248

self._download_webpage(

3249

url, video_id, f'Marking {label}watched',

3250

'Unable to mark watched', fatal=False)

3251

3252

@classmethod

3253

def _extract_from_webpage(cls, url, webpage):

3254

# Invidious Instances

3255

# https://github.com/yt-dlp/yt-dlp/issues/195

3256

# https://github.com/iv-org/invidious/pull/1730

3257

mobj = re.search(

3258

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3259

webpage)

3260

if mobj:

3261

yield cls.url_result(mobj.group('url'), cls)

3262

raise cls.StopExtraction()

3263

3264

yield from super()._extract_from_webpage(url, webpage)

3265

3266

# lazyYT YouTube embed

3267

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3268

yield cls.url_result(unescapeHTML(id_), cls, id_)

3269

3270

# Wordpress "YouTube Video Importer" plugin

3271

for m in re.findall(r'''(?x)<div[^>]+

3272

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3273

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3274

yield cls.url_result(m[-1], cls, m[-1])

3275

3276

@classmethod

3277

def extract_id(cls, url):

3278

video_id = cls.get_temp_id(url)

3279

if not video_id:

3280

raise ExtractorError(f'Invalid URL: {url}')

3281

return video_id

3282

3283

def _extract_chapters_from_json(self, data, duration):

3284

chapter_list = traverse_obj(

3285

data, (

3286

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3287

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3288

), expected_type=list)

3289

3290

return self._extract_chapters_helper(

3291

chapter_list,

3292

start_function=lambda chapter: float_or_none(

3293

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3294

title_function=lambda chapter: traverse_obj(

3295

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3296

duration=duration)

3297

3298

def _extract_chapters_from_engagement_panel(self, data, duration):

3299

content_list = traverse_obj(

3300

data,

3301

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3302

expected_type=list)

3303

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3304

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3305

3306

return next(filter(None, (

3307

self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3308

chapter_time, chapter_title, duration)

3309

for contents in content_list)), [])

3310

3311

def _extract_heatmap(self, data):

3312

return traverse_obj(data, (

3313

'frameworkUpdates', 'entityBatchUpdate', 'mutations',

3314

lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',

3315

'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {

3316

'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),

3317

'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},

3318

'value': ('intensityScoreNormalized', {float_or_none}),

3319

})) or None

3320

3321

def _extract_comment(self, entities, parent=None):

3322

comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))

3323

if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):

3324

return

3325

3326

toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))

3327

time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''

return {

'id': comment_id,

'parent': parent or 'root',

3332

**traverse_obj(comment_entity_payload, {

3333

'text': ('properties', 'content', 'content', {str}),

3334

'like_count': ('toolbar', 'likeCountA11y', {parse_count}),

3335

'author_id': ('author', 'channelId', {self.ucid_or_none}),

3336

'author': ('author', 'displayName', {str}),

3337

'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),

3338

'author_is_uploader': ('author', 'isCreator', {bool}),

3339

'author_is_verified': ('author', 'isVerified', {bool}),

3340

'author_url': ('author', 'channelCommand', 'innertubeCommand', (

3341

('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')

3342

), {lambda x: urljoin('https://www.youtube.com', x)}),

3343

}, get_all=False),

3344

'is_favorited': (None if toolbar_entity_payload is None else

3345

toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),

3346

'_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.

3347

'timestamp': self._parse_time_text(time_text),

3348

}

3349

3350

def _extract_comment_old(self, comment_renderer, parent=None):

3351

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

info = {

'id': comment_id,

'text': self._get_text(comment_renderer, 'contentText'),

3358

'like_count': self._get_count(comment_renderer, 'voteCount'),

3359

'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),

3360

'author': self._get_text(comment_renderer, 'authorText'),

3361

'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),

3362

'parent': parent or 'root',

3363

}

3364

3365

# Timestamp is an estimate calculated from the current time and time_text

3366

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3367

timestamp = self._parse_time_text(time_text)

3368

3369

info.update({

3370

# FIXME: non-standard, but we need a way of showing that it is an estimate.

3371

'_time_text': time_text,

3372

'timestamp': timestamp,

3373

})

3374

3375

info['author_url'] = urljoin(

3376

'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (

3377

('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),

3378

expected_type=str, get_all=False))

3379

3380

author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')

3381

if author_is_uploader is not None:

3382

info['author_is_uploader'] = author_is_uploader

3383

3384

comment_abr = traverse_obj(

3385

comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)

3386

if comment_abr is not None:

3387

info['is_favorited'] = 'creatorHeart' in comment_abr

3388

3389

badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])

3390

if self._has_badge(badges, BadgeType.VERIFIED):

3391

info['author_is_verified'] = True

3392

3393

is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')

3394

if is_pinned:

3395

info['is_pinned'] = True

return info

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3400

3401

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3402

3403

def extract_header(contents):

3404

_continuation = None

3405

for content in contents:

3406

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3407

expected_comment_count = self._get_count(

3408

comments_header_renderer, 'countText', 'commentsCount')

3409

3410

if expected_comment_count is not None:

3411

tracker['est_total'] = expected_comment_count

3412

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3413

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3414

3415

sort_menu_item = try_get(

3416

comments_header_renderer,

3417

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3418

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3419

3420

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3421

if not _continuation:

3422

continue

3423

3424

sort_text = str_or_none(sort_menu_item.get('title'))

3425

if not sort_text:

3426

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3427

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents, entity_payloads):

3432

if not parent:

3433

tracker['current_page_thread'] = 0

3434

for content in contents:

3435

if not parent and tracker['total_parent_comments'] >= max_parents:

3436

yield

3437

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3438

3439

# old comment format

3440

if not entity_payloads:

3441

comment_renderer = get_first(

3442

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3443

expected_type=dict, default={})

3444

3445

comment = self._extract_comment_old(comment_renderer, parent)

# new comment format

else:

view_model = (

traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))

3451

or traverse_obj(content, ('commentViewModel', {dict})))

3452

comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))

3453

if not comment_keys:

3454

continue

3455

entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)

3456

comment = self._extract_comment(entities, parent)

3457

if comment:

3458

comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None

if not comment:

continue

comment_id = comment['id']

3463

3464

if comment.get('is_pinned'):

3465

tracker['pinned_comment_ids'].add(comment_id)

3466

# Sometimes YouTube may break and give us infinite looping comments.

3467

# See: https://github.com/yt-dlp/yt-dlp/issues/6290

3468

if comment_id in tracker['seen_comment_ids']:

3469

if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):

3470

# Pinned comments may appear a second time in newest first sort

3471

# See: https://github.com/yt-dlp/yt-dlp/issues/6712

3472

continue

3473

self.report_warning(

3474

'Detected YouTube comments looping. Stopping comment extraction '

3475

f'{"for this thread" if parent else ""} as we probably cannot get any more.')

3476

yield

3477

else:

3478

tracker['seen_comment_ids'].add(comment['id'])

3479

3480

tracker['running_total'] += 1

3481

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3482

yield comment

3483

3484

# Attempt to get the replies

3485

comment_replies_renderer = try_get(

3486

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3487

3488

if comment_replies_renderer:

3489

tracker['current_page_thread'] += 1

3490

comment_entries_iter = self._comment_entries(

3491

comment_replies_renderer, ytcfg, video_id,

3492

parent=comment.get('id'), tracker=tracker)

3493

yield from itertools.islice(comment_entries_iter, min(

3494

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3495

3496

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=None,

current_page_thread=0,

3502

total_parent_comments=0,

3503

total_reply_comments=0,

3504

seen_comment_ids=set(),

3505

pinned_comment_ids=set()

)

# TODO: Deprecated

# YouTube comments have a max depth of 2

3510

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3511

if max_depth:

3512

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3513

'Set max replies in the max-comments extractor argument instead')

3514

if max_depth == 1 and parent:

3515

return

3516

3517

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3518

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3519

3520

continuation = self._extract_continuation(root_continuation_data)

3521

3522

response = None

3523

is_forced_continuation = False

3524

is_first_continuation = parent is None

3525

if is_first_continuation and not continuation:

3526

# Sometimes you can get comments by generating the continuation yourself,

3527

# even if YouTube initially reports them being disabled - e.g. stories comments.

3528

# Note: if the comment section is actually disabled, YouTube may return a response with

3529

# required check_get_keys missing. So we will disable that check initially in this case.

3530

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3531

is_forced_continuation = True

3532

3533

continuation_items_path = (

3534

'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')

3535

for page_num in itertools.count(0):

3536

if not continuation:

3537

break

3538

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3539

comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"

3540

if page_num == 0:

3541

if is_first_continuation:

3542

note_prefix = 'Downloading comment section API JSON'

3543

else:

3544

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3545

tracker['current_page_thread'], comment_prog_str)

3546

else:

3547

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3548

' ' if parent else '', ' replies' if parent else '',

3549

page_num, comment_prog_str)

3550

3551

# Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation

3552

# Ignore check if YouTube says the comment count is 0.

3553

check_get_keys = None

3554

if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):

3555

check_get_keys = [[*continuation_items_path, ..., (

3556

'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]

3557

try:

3558

response = self._extract_response(

3559

item_id=None, query=continuation,

3560

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3561

check_get_keys=check_get_keys)

3562

except ExtractorError as e:

3563

# Ignore incomplete data error for replies if retries didn't work.

3564

# This is to allow any other parent comments and comment threads to be downloaded.

3565

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3566

if 'incomplete data' in str(e).lower() and parent:

3567

if self.get_param('ignoreerrors') in (True, 'only_download'):

3568

self.report_warning(

3569

'Received incomplete data for a comment reply thread and retrying did not help. '

3570

'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')

3571

return

3572

else:

3573

raise ExtractorError(

3574

'Incomplete data received for comment reply thread. '

3575

'Pass --ignore-errors to ignore and allow rest of comments to download.',

3576

expected=True)

3577

raise

3578

is_forced_continuation = False

3579

continuation = None

3580

mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))

3581

for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):

3582

if is_first_continuation:

3583

continuation = extract_header(continuation_items)

3584

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items, mutations):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3598

if message and not parent and tracker['running_total'] == 0:

3599

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3600

raise self.CommentsDisabled

3601

3602

@staticmethod

3603

def _generate_comment_continuation(video_id):

3604

"""

3605

Generates initial comment section continuation token from given video id

3606

"""

3607

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3608

return base64.b64encode(token.encode()).decode()

3609

3610

def _get_comments(self, ytcfg, video_id, contents, webpage):

3611

"""Entry for comment extraction"""

3612

def _real_comment_extract(contents):

3613

renderer = next((

3614

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3615

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3616

yield from self._comment_entries(renderer, ytcfg, video_id)

3617

3618

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3619

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3620

3621

@staticmethod

3622

def _get_checkok_params():

3623

return {'contentCheckOk': True, 'racyCheckOk': True}

3624

3625

@classmethod

3626

def _generate_player_context(cls, sts=None):

3627

context = {

3628

'html5Preference': 'HTML5_PREF_WANTS',

3629

}

3630

if sts is not None:

3631

context['signatureTimestamp'] = sts

3632

return {

3633

'playbackContext': {

3634

'contentPlaybackContext': context

3635

},

3636

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3641

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3642

return True

3643

3644

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))

3645

AGE_GATE_REASONS = (

3646

'confirm your age', 'age-restricted', 'inappropriate', # reason

3647

'age_verification_required', 'age_check_required', # status

3648

)

3649

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3650

3651

@staticmethod

3652

def _is_unplayable(player_response):

3653

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3654

3655

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3656

3657

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3658

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3659

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3660

headers = self.generate_api_headers(

3661

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]

3668

if pp_arg:

3669

yt_query['params'] = pp_arg

3670

3671

yt_query.update(self._generate_player_context(sts))

3672

return self._extract_response(

3673

item_id=video_id, ep='player', query=yt_query,

3674

ytcfg=player_ytcfg, headers=headers, fatal=True,

3675

default_client=client,

3676

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3677

) or None

3678

3679

def _get_requested_clients(self, url, smuggled_data):

3680

requested_clients = []

3681

android_clients = []

3682

default = ['ios', 'web']

3683

allowed_clients = sorted(

3684

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3685

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3686

for client in self._configuration_arg('player_client'):

3687

if client == 'default':

3688

requested_clients.extend(default)

3689

elif client == 'all':

3690

requested_clients.extend(allowed_clients)

3691

elif client not in allowed_clients:

3692

self.report_warning(f'Skipping unsupported client {client}')

3693

elif client.startswith('android'):

3694

android_clients.append(client)

3695

else:

3696

requested_clients.append(client)

3697

# Force deprioritization of broken Android clients for format de-duplication

3698

requested_clients.extend(android_clients)

3699

if not requested_clients:

3700

requested_clients = default

3701

3702

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3703

requested_clients.extend(

3704

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3705

3706

return orderedSet(requested_clients)

3707

3708

def _invalid_player_response(self, pr, video_id):

3709

# YouTube may return a different video player response than expected.

3710

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3711

if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:

3712

return pr_id

3713

3714

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3715

initial_pr = None

3716

if webpage:

3717

initial_pr = self._search_json(

3718

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3719

3720

prs = []

3721

if initial_pr and not self._invalid_player_response(initial_pr, video_id):

3722

# Android player_response does not have microFormats which are needed for

3723

# extraction of some data. So we return the initial_pr with formats

3724

# stripped out even if not requested by the user

3725

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3726

prs.append({**initial_pr, 'streamingData': None})

3727

3728

all_clients = set(clients)

3729

clients = clients[::-1]

3730

3731

def append_client(*client_names):

3732

""" Append the first client name that exists but not already used """

3733

for client_name in client_names:

3734

actual_client = _split_innertube_client(client_name)[0]

3735

if actual_client in INNERTUBE_CLIENTS:

3736

if actual_client not in all_clients:

3737

clients.append(client_name)

3738

all_clients.add(actual_client)

3739

return

3740

3741

tried_iframe_fallback = False

player_url = None

skipped_clients = {}

while clients:

client, base_client, variant = _split_innertube_client(clients.pop())

3746

player_ytcfg = master_ytcfg if client == 'web' else {}

3747

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3748

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3749

3750

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3751

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3752

if 'js' in self._configuration_arg('player_skip'):

3753

require_js_player = False

3754

player_url = None

3755

3756

if not player_url and not tried_iframe_fallback and require_js_player:

3757

player_url = self._download_player_url(video_id)

3758

tried_iframe_fallback = True

3759

3760

try:

3761

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3762

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3763

except ExtractorError as e:

3764

self.report_warning(e)

3765

continue

3766

3767

if pr_id := self._invalid_player_response(pr, video_id):

3768

skipped_clients[client] = pr_id

3769

elif pr:

3770

# Save client name for introspection later

3771

name = short_client_name(client)

3772

sd = traverse_obj(pr, ('streamingData', {dict})) or {}

3773

sd[STREAMING_DATA_CLIENT_NAME] = name

3774

for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):

3775

f[STREAMING_DATA_CLIENT_NAME] = name

3776

prs.append(pr)

3777

3778

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3779

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3780

append_client(f'{base_client}_creator')

3781

elif self._is_agegated(pr):

3782

if variant == 'tv_embedded':

3783

append_client(f'{base_client}_embedded')

3784

elif not variant:

3785

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if skipped_clients:

self.report_warning(

f'Skipping player responses from {"/".join(skipped_clients)} clients '

3790

f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')

3791

if not prs:

3792

raise ExtractorError(

3793

'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)

3794

elif not prs:

3795

raise ExtractorError('Failed to extract any player response')

3796

return prs, player_url

3797

3798

def _needs_live_processing(self, live_status, duration):

3799

if (live_status == 'is_live' and self.get_param('live_from_start')

3800

or live_status == 'post_live' and (duration or 0) > 2 * 3600):

3801

return live_status

3802

3803

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3804

CHUNK_SIZE = 10 << 20

3805

itags, stream_ids = collections.defaultdict(set), []

3806

itag_qualities, res_qualities = {}, {0: None}

3807

q = qualities([

3808

# Normally tiny is the smallest video-only formats. But

3809

# audio-only formats with unknown quality may get tagged as tiny

3810

'tiny',

3811

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3812

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3813

])

3814

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))

3815

format_types = self._configuration_arg('formats')

3816

all_formats = 'duplicate' in format_types

3817

if self._configuration_arg('include_duplicate_formats'):

3818

all_formats = True

3819

self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '

3820

'Use formats=duplicate extractor argument instead')

3821

3822

def build_fragments(f):

3823

return LazyList({

3824

'url': update_url_query(f['url'], {

3825

'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'

3826

})

3827

} for range_start in range(0, f['filesize'], CHUNK_SIZE))

3828

3829

for fmt in streaming_formats:

3830

if fmt.get('targetDurationSec'):

3831

continue

3832

3833

itag = str_or_none(fmt.get('itag'))

3834

audio_track = fmt.get('audioTrack') or {}

3835

stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))

3836

if not all_formats:

3837

if stream_id in stream_ids:

3838

continue

3839

3840

quality = fmt.get('quality')

3841

height = int_or_none(fmt.get('height'))

3842

if quality == 'tiny' or not quality:

3843

quality = fmt.get('audioQuality', '').lower() or quality

3844

# The 3gp format (17) in android client has a quality of "small",

3845

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3851

if height:

3852

res_qualities[height] = quality

3853

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3854

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3855

# number of fragment that would subsequently requested with (`&sq=N`)

3856

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3857

continue

3858

3859

fmt_url = fmt.get('url')

3860

if not fmt_url:

3861

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3862

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3863

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3864

if not all((sc, fmt_url, player_url, encrypted_sig)):

3865

continue

3866

try:

3867

fmt_url += '&%s=%s' % (

3868

traverse_obj(sc, ('sp', -1)) or 'signature',

3869

self._decrypt_signature(encrypted_sig, video_id, player_url)

3870

)

3871

except ExtractorError as e:

3872

self.report_warning('Signature extraction failed: Some formats may be missing',

3873

video_id=video_id, only_once=True)

3874

self.write_debug(e, only_once=True)

3875

continue

3876

3877

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3882

fmt_url = update_url_query(fmt_url, {

3883

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3884

})

3885

except ExtractorError as e:

3886

phantomjs_hint = ''

3887

if isinstance(e, JSInterpreter.Exception):

3888

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3889

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3890

if player_url:

3891

self.report_warning(

3892

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3893

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3894

self.write_debug(e, only_once=True)

3895

else:

3896

self.report_warning(

3897

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3898

video_id=video_id, only_once=True)

3899

throttled = True

3900

3901

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3902

language_preference = (

3903

10 if audio_track.get('audioIsDefault') and 10

3904

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3905

else -1)

3906

format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))

3907

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3908

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3909

# Make sure to avoid false positives with small duration differences.

3910

# E.g. __2ABJjxzNo, ySuUZEjARPY

3911

is_damaged = try_call(lambda: format_duration < duration // 2)

3912

if is_damaged:

3913

self.report_warning(

3914

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3915

3916

client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)

3917

# Android client formats are broken due to integrity check enforcement

3918

# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554

3919

is_broken = client_name and client_name.startswith(short_client_name('android'))

3920

if is_broken:

3921

self.report_warning(

3922

f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '

3923

'They will be deprioritized', only_once=True)

3924

3925

name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''

3926

fps = int_or_none(fmt.get('fps')) or 0

3927

dct = {

3928

'asr': int_or_none(fmt.get('audioSampleRate')),

3929

'filesize': int_or_none(fmt.get('contentLength')),

3930

'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',

3931

'format_note': join_nonempty(

3932

join_nonempty(audio_track.get('displayName'),

3933

language_preference > 0 and ' (default)', delim=''),

3934

name, fmt.get('isDrc') and 'DRC',

3935

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3936

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3937

throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',

3938

(self.get_param('verbose') or all_formats) and client_name,

3939

delim=', '),

3940

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3941

'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)

3942

+ (100 if 'Premium' in name else 0)),

3943

'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1

3944

'audio_channels': fmt.get('audioChannels'),

3945

'height': height,

3946

'quality': q(quality) - bool(fmt.get('isDrc')) / 2,

3947

'has_drm': bool(fmt.get('drmFamilies')),

3948

'tbr': tbr,

3949

'filesize_approx': filesize_from_tbr(tbr, format_duration),

3950

'url': fmt_url,

3951

'width': int_or_none(fmt.get('width')),

3952

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3953

'desc' if language_preference < -1 else '') or None,

3954

'language_preference': language_preference,

3955

# Strictly de-prioritize broken, damaged and 3gp formats

3956

'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,

3957

}

3958

mime_mobj = re.match(

3959

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3960

if mime_mobj:

3961

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3962

dct.update(parse_codecs(mime_mobj.group(2)))

3963

if itag:

3964

itags[itag].add(('https', dct.get('language')))

3965

stream_ids.append(stream_id)

3966

single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))

3967

if single_stream and dct.get('ext'):

3968

dct['container'] = dct['ext'] + '_dash'

3969

3970

if (all_formats or 'dashy' in format_types) and dct['filesize']:

3971

yield {

3972

**dct,

3973

'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],

3974

'protocol': 'http_dash_segments',

3975

'fragments': build_fragments(dct),

3976

}

3977

if all_formats or 'dashy' not in format_types:

3978

dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}

3979

yield dct

3980

3981

needs_live_processing = self._needs_live_processing(live_status, duration)

3982

skip_bad_formats = 'incomplete' not in format_types

3983

if self._configuration_arg('include_incomplete_formats'):

3984

skip_bad_formats = False

3985

self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '

3986

'Use formats=incomplete extractor argument instead')

3987

3988

skip_manifests = set(self._configuration_arg('skip'))

3989

if (not self.get_param('youtube_include_hls_manifest', True)

3990

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3991

or needs_live_processing and skip_bad_formats):

3992

skip_manifests.add('hls')

3993

3994

if not self.get_param('youtube_include_dash_manifest', True):

3995

skip_manifests.add('dash')

3996

if self._configuration_arg('include_live_dash'):

3997

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3998

'Use formats=incomplete extractor argument instead')

3999

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

4000

skip_manifests.add('dash')

4001

4002

def process_manifest_format(f, proto, client_name, itag):

4003

key = (proto, f.get('language'))

4004

if not all_formats and key in itags[itag]:

return False

itags[itag].add(key)

if itag and all_formats:

4009

f['format_id'] = f'{itag}-{proto}'

4010

elif any(p != proto for p, _ in itags[itag]):

4011

f['format_id'] = f'{itag}-{proto}'

4012

elif itag:

4013

f['format_id'] = itag

4014

4015

if f.get('source_preference') is None:

4016

f['source_preference'] = -1

4017

4018

if itag in ('616', '235'):

4019

f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')

4020

f['source_preference'] += 100

4021

4022

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

4023

if f['quality'] == -1 and f.get('height'):

4024

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

4025

if self.get_param('verbose') or all_formats:

4026

f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')

4027

if f.get('fps') and f['fps'] <= 1:

4028

del f['fps']

4029

4030

if proto == 'hls' and f.get('has_drm'):

4031

f['has_drm'] = 'maybe'

4032

f['source_preference'] -= 5

return True

subtitles = {}

for sd in streaming_data:

4037

client_name = sd.get(STREAMING_DATA_CLIENT_NAME)

4038

4039

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

4040

if hls_manifest_url:

4041

fmts, subs = self._extract_m3u8_formats_and_subtitles(

4042

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

4043

subtitles = self._merge_subtitles(subs, subtitles)

4044

for f in fmts:

4045

if process_manifest_format(f, 'hls', client_name, self._search_regex(

4046

r'/itag/(\d+)', f['url'], 'itag', default=None)):

4047

yield f

4048

4049

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

4050

if dash_manifest_url:

4051

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

4052

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

4053

for f in formats:

4054

if process_manifest_format(f, 'dash', client_name, f['format_id']):

4055

f['filesize'] = int_or_none(self._search_regex(

4056

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

4057

if needs_live_processing:

4058

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

4064

spec = get_first(

4065

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

4066

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

4071

args = args.split('#')

4072

counts = list(map(int_or_none, args[:5]))

4073

if len(args) != 8 or not all(counts):

4074

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

4075

continue

4076

width, height, frame_count, cols, rows = counts

4077

N, sigh = args[6:]

4078

4079

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

4080

fragment_count = frame_count / (cols * rows)

4081

fragment_duration = duration / fragment_count

4082

yield {

4083

'format_id': f'sb{i}',

4084

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

4097

'duration': min(fragment_duration, duration - (j * fragment_duration)),

4098

} for j in range(math.ceil(fragment_count))],

4099

}

4100

4101

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

4102

webpage = None

4103

if 'webpage' not in self._configuration_arg('player_skip'):

4104

query = {'bpctr': '9999999999', 'has_verified': '1'}

4105

pp = self._configuration_arg('player_params', [None], casesense=True)[0]

4106

if pp:

4107

query['pp'] = pp

4108

webpage = self._download_webpage(

4109

webpage_url, video_id, fatal=False, query=query)

4110

4111

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

4112

4113

player_responses, player_url = self._extract_player_responses(

4114

self._get_requested_clients(url, smuggled_data),

4115

video_id, webpage, master_ytcfg, smuggled_data)

4116

4117

return webpage, master_ytcfg, player_responses, player_url

4118

4119

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

4120

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

4121

is_live = get_first(video_details, 'isLive')

4122

if is_live is None:

4123

is_live = get_first(live_broadcast_details, 'isLiveNow')

4124

live_content = get_first(video_details, 'isLiveContent')

4125

is_upcoming = get_first(video_details, 'isUpcoming')

4126

post_live = get_first(video_details, 'isPostLiveDvr')

4127

live_status = ('post_live' if post_live

4128

else 'is_live' if is_live

4129

else 'is_upcoming' if is_upcoming

4130

else 'was_live' if live_content

4131

else 'not_live' if False in (is_live, live_content)

4132

else None)

4133

streaming_data = traverse_obj(player_responses, (..., 'streamingData'))

4134

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

4135

if all(f.get('has_drm') for f in formats):

4136

# If there are no formats that definitely don't have DRM, all have DRM

for f in formats:

f['has_drm'] = True

return live_broadcast_details, live_status, streaming_data, formats, subtitles

4141

4142

def _real_extract(self, url):

4143

url, smuggled_data = unsmuggle_url(url, {})

4144

video_id = self._match_id(url)

4145

4146

base_url = self.http_scheme() + '//www.youtube.com/'

4147

webpage_url = base_url + 'watch?v=' + video_id

4148

4149

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

4150

4151

playability_statuses = traverse_obj(

4152

player_responses, (..., 'playabilityStatus'), expected_type=dict)

4153

4154

trailer_video_id = get_first(

4155

playability_statuses,

4156

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

4157

expected_type=str)

4158

if trailer_video_id:

4159

return self.url_result(

4160

trailer_video_id, self.ie_key(), trailer_video_id)

4161

4162

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

4163

if webpage else (lambda x: None))

4164

4165

video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)

4166

microformats = traverse_obj(

4167

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

4168

expected_type=dict)

4169

4170

translated_title = self._get_text(microformats, (..., 'title'))

4171

video_title = (self._preferred_lang and translated_title

4172

or get_first(video_details, 'title') # primary

4173

or translated_title

4174

or search_meta(['og:title', 'twitter:title', 'title']))

4175

translated_description = self._get_text(microformats, (..., 'description'))

4176

original_description = get_first(video_details, 'shortDescription')

4177

video_description = (

4178

self._preferred_lang and translated_description

4179

# If original description is blank, it will be an empty string.

4180

# Do not prefer translated description in this case.

4181

or original_description if original_description is not None else translated_description)

4182

4183

multifeed_metadata_list = get_first(

4184

player_responses,

4185

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

4186

expected_type=str)

4187

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

4188

if self.get_param('noplaylist'):

4189

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

4194

# Unquote should take place before split on comma (,) since textual

4195

# fields may contain comma as well (see

4196

# https://github.com/ytdl-org/youtube-dl/issues/8536)

4197

feed_data = urllib.parse.parse_qs(

4198

urllib.parse.unquote_plus(feed))

4199

4200

def feed_entry(name):

4201

return try_get(

4202

feed_data, lambda x: x[name][0], str)

4203

4204

feed_id = feed_entry('id')

4205

if not feed_id:

4206

continue

4207

feed_title = feed_entry('title')

4208

title = video_title

4209

if feed_title:

4210

title += ' (%s)' % feed_title

4211

entries.append({

4212

'_type': 'url_transparent',

4213

'ie_key': 'Youtube',

4214

'url': smuggle_url(

4215

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

4216

{'force_singlefeed': True}),

4217

'title': title,

4218

})

4219

feed_ids.append(feed_id)

4220

self.to_screen(

4221

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

4222

% (', '.join(feed_ids), video_id))

4223

return self.playlist_result(

4224

entries, video_id, video_title, video_description)

4225

4226

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

4227

or int_or_none(get_first(microformats, 'lengthSeconds'))

4228

or parse_duration(search_meta('duration')) or None)

4229

4230

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

4231

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

4232

if live_status == 'post_live':

4233

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

4234

4235

if not formats:

4236

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

4237

self.report_drm(video_id)

4238

pemr = get_first(

4239

playability_statuses,

4240

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

4241

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

4242

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

4243

if subreason:

4244

if subreason == 'The uploader has not made this video available in your country.':

4245

countries = get_first(microformats, 'availableCountries')

4246

if not countries:

4247

regions_allowed = search_meta('regionsAllowed')

4248

countries = regions_allowed.split(',') if regions_allowed else None

4249

self.raise_geo_restricted(subreason, countries, metadata_available=True)

4250

reason += f'. {subreason}'

4251

if reason:

4252

self.raise_no_formats(reason, expected=True)

4253

4254

keywords = get_first(video_details, 'keywords', expected_type=list) or []

4255

if not keywords and webpage:

4256

keywords = [

4257

unescapeHTML(m.group('content'))

4258

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

4259

for keyword in keywords:

4260

if keyword.startswith('yt:stretch='):

4261

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

4262

if mobj:

4263

# NB: float is intentional for forcing float division

4264

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

4269

f['stretched_ratio'] = ratio

4270

break

4271

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

4272

thumbnail_url = search_meta(['og:image', 'twitter:image'])

4273

if thumbnail_url:

4274

thumbnails.append({

4275

'url': thumbnail_url,

4276

})

4277

original_thumbnails = thumbnails.copy()

4278

4279

# The best resolution thumbnails sometimes does not appear in the webpage

4280

# See: https://github.com/yt-dlp/yt-dlp/issues/340

4281

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

4282

thumbnail_names = [

4283

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

4284

# in resolution, these are not the custom thumbnail. So de-prioritize them

4285

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

4286

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

4287

]

4288

n_thumbnail_names = len(thumbnail_names)

4289

thumbnails.extend({

4290

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

4291

video_id=video_id, name=name, ext=ext,

4292

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

4293

} for name in thumbnail_names for ext in ('webp', 'jpg'))

4294

for thumb in thumbnails:

4295

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

4296

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

4297

self._remove_duplicate_formats(thumbnails)

4298

self._downloader._sort_thumbnails(original_thumbnails)

4299

4300

category = get_first(microformats, 'category') or search_meta('genre')

4301

channel_id = self.ucid_or_none(str_or_none(

4302

get_first(video_details, 'channelId')

4303

or get_first(microformats, 'externalChannelId')

4304

or search_meta('channelId')))

4305

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

4306

4307

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

4308

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

4309

if not duration and live_end_time and live_start_time:

4310

duration = live_end_time - live_start_time

4311

4312

needs_live_processing = self._needs_live_processing(live_status, duration)

4313

4314

def is_bad_format(fmt):

4315

if needs_live_processing and not fmt.get('is_from_start'):

4316

return True

4317

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

4318

and fmt.get('protocol') == 'http_dash_segments'):

4319

return True

4320

4321

for fmt in filter(is_bad_format, formats):

4322

fmt['preference'] = (fmt.get('preference') or -1) - 10

4323

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')

4324

4325

if needs_live_processing:

4326

self._prepare_live_from_start_formats(

4327

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

4328

4329

formats.extend(self._extract_storyboard(player_responses, duration))

4330

4331

channel_handle = self.handle_from_url(owner_profile_url)

info = {

'id': video_id,

'title': video_title,

4336

'formats': formats,

4337

'thumbnails': thumbnails,

4338

# The best thumbnail that we are sure exists. Prevents unnecessary

4339

# URL checking if user don't care about getting the best possible thumbnail

4340

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

4341

'description': video_description,

4342

'channel_id': channel_id,

4343

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),

4344

'duration': duration,

4345

'view_count': int_or_none(

4346

get_first((video_details, microformats), (..., 'viewCount'))

4347

or search_meta('interactionCount')),

4348

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

4349

'age_limit': 18 if (

4350

get_first(microformats, 'isFamilySafe') is False

4351

or search_meta('isFamilyFriendly') == 'false'

4352

or search_meta('og:restrictions:age') == '18+') else 0,

4353

'webpage_url': webpage_url,

4354

'categories': [category] if category else None,

4355

'tags': keywords,

4356

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

4357

'live_status': live_status,

4358

'release_timestamp': live_start_time,

4359

'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats

4360

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

4365

if pctr:

4366

def get_lang_code(track):

4367

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

4368

or track.get('languageCode'))

4369

4370

# Converted into dicts to remove duplicates

4371

captions = {

4372

get_lang_code(sub): sub

4373

for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}

4374

translation_languages = {

4375

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

4376

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}

4377

4378

def process_language(container, base_url, lang_code, sub_name, query):

4379

lang_subs = container.setdefault(lang_code, [])

4380

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4391

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4392

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4393

for lang_code, caption_track in captions.items():

4394

base_url = caption_track.get('baseUrl')

4395

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4396

if not base_url:

4397

continue

4398

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4399

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4404

if not caption_track.get('isTranslatable'):

4405

continue

4406

for trans_code, trans_name in translation_languages.items():

4407

if not trans_code:

4408

continue

4409

orig_trans_code = trans_code

4410

if caption_track.get('kind') != 'asr' and trans_code != 'und':

4411

if not get_translated_subs:

4412

continue

4413

trans_code += f'-{lang_code}'

4414

trans_name += format_field(lang_name, None, ' from %s')

4415

if lang_code == f'a-{orig_trans_code}':

4416

# Set audio language based on original subtitles

4417

for f in formats:

4418

if f.get('acodec') != 'none' and not f.get('language'):

4419

f['language'] = orig_trans_code

4420

# Add an "-orig" label to the original language so that it can be distinguished.

4421

# The subs are returned without "-orig" as well for compatibility

4422

process_language(

4423

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4424

# Setting tlang=lang returns damaged subtitles.

4425

process_language(automatic_captions, base_url, trans_code, trans_name,

4426

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4427

4428

info['automatic_captions'] = automatic_captions

4429

info['subtitles'] = subtitles

4430

4431

parsed_url = urllib.parse.urlparse(url)

4432

for component in [parsed_url.fragment, parsed_url.query]:

4433

query = urllib.parse.parse_qs(component)

4434

for k, v in query.items():

4435

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4436

d_k += '_time'

4437

if d_k not in info and k in s_ks:

4438

info[d_k] = parse_duration(query[k][0])

4439

4440

# Youtube Music Auto-generated description

4441

if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):

4442

# XXX: Causes catastrophic backtracking if description has "·"

4443

# E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI

4444

# Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x

4445

# reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2

4446

mobj = re.search(

4447

r'''(?xs)

4448

(?=(?P<track>[^\n·]+))(?P=track)·

4449

(?=(?P<artist>[^\n]+))(?P=artist)\n+

4450

(?=(?P<album>[^\n]+))(?P=album)\n

4451

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4452

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4453

(.+?\nArtist\s*:\s*

4454

(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n

4455

)?.+\nAuto-generated\ by\ YouTube\.\s*$

4456

''', video_description)

4457

if mobj:

4458

release_year = mobj.group('release_year')

4459

release_date = mobj.group('release_date')

4460

if release_date:

4461

release_date = release_date.replace('-', '')

4462

if not release_year:

4463

release_year = release_date[:4]

4464

info.update({

4465

'album': mobj.group('album'.strip()),

4466

'artists': ([a] if (a := mobj.group('clean_artist'))

4467

else [a.strip() for a in mobj.group('artist').split('·')]),

4468

'track': mobj.group('track').strip(),

4469

'release_date': release_date,

4470

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4476

if not traverse_obj(initial_data, 'contents'):

4477

self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')

4478

initial_data = None

4479

if not initial_data:

4480

query = {'videoId': video_id}

4481

query.update(self._get_checkok_params())

4482

initial_data = self._extract_response(

4483

item_id=video_id, ep='next', fatal=False,

4484

ytcfg=master_ytcfg, query=query, check_get_keys='contents',

4485

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4486

note='Downloading initial data API JSON')

4487

4488

info['comment_count'] = traverse_obj(initial_data, (

4489

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4490

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'

4491

), (

4492

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4493

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'

4494

), expected_type=self._get_count, get_all=False)

4495

4496

try: # This will error if there is no livechat

4497

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4498

except (KeyError, IndexError, TypeError):

4499

pass

4500

else:

4501

info.setdefault('subtitles', {})['live_chat'] = [{

4502

# url is needed to set cookies

4503

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4504

'video_id': video_id,

4505

'ext': 'json',

4506

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4507

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4513

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4514

or self._extract_chapters_from_description(video_description, duration)

4515

or None)

4516

4517

info['heatmap'] = self._extract_heatmap(initial_data)

4518

4519

contents = traverse_obj(

4520

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4521

expected_type=list, default=[])

4522

4523

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4524

if vpir:

4525

stl = vpir.get('superTitleLink')

4526

if stl:

4527

stl = self._get_text(stl)

4528

if try_get(

4529

vpir,

4530

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4531

info['location'] = stl

4532

else:

4533

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4534

if mobj:

4535

info.update({

4536

'series': mobj.group(1),

4537

'season_number': int(mobj.group(2)),

4538

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, ('toggleButtonRenderer', ...),

4547

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))

4548

for tbr in tbrs:

4549

for getter, regex in [(

4550

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4551

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4552

lambda x: x['accessibility'],

4553

lambda x: x['accessibilityData']['accessibilityData'],

4554

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4555

label = (try_get(tbr, getter, dict) or {}).get('label')

4556

if label:

4557

mobj = re.match(regex, label)

4558

if mobj:

4559

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4560

break

4561

4562

info['like_count'] = traverse_obj(vpir, (

4563

'videoActions', 'menuRenderer', 'topLevelButtons', ...,

4564

'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',

4565

'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',

4566

'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)

4567

4568

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4569

if vcr:

4570

vc = self._get_count(vcr, 'viewCount')

4571

# Upcoming premieres with waiting count are treated as live here

4572

if vcr.get('isLive'):

4573

info['concurrent_view_count'] = vc

4574

elif info.get('view_count') is None:

4575

info['view_count'] = vc

4576

4577

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4578

if vsir:

4579

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4580

info.update({

4581

'channel': self._get_text(vor, 'title'),

4582

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

4583

4584

if not channel_handle:

4585

channel_handle = self.handle_from_url(

4586

traverse_obj(vor, (

4587

('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),

4588

(('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),

4589

{str}), get_all=False))

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4594

list) or []

4595

multiple_songs = False

4596

for row in rows:

4597

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4598

multiple_songs = True

4599

break

4600

for row in rows:

4601

mrr = row.get('metadataRowRenderer') or {}

4602

mrr_title = mrr.get('title')

4603

if not mrr_title:

4604

continue

4605

mrr_title = self._get_text(mrr, 'title')

4606

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4607

if mrr_title == 'License':

4608

info['license'] = mrr_contents_text

4609

elif not multiple_songs:

4610

if mrr_title == 'Album':

4611

info['album'] = mrr_contents_text

4612

elif mrr_title == 'Artist':

4613

info['artists'] = [mrr_contents_text] if mrr_contents_text else None

4614

elif mrr_title == 'Song':

4615

info['track'] = mrr_contents_text

4616

owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))

4617

if self._has_badge(owner_badges, BadgeType.VERIFIED):

4618

info['channel_is_verified'] = True

4619

4620

info.update({

4621

'uploader': info.get('channel'),

4622

'uploader_id': channel_handle,

4623

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

4624

})

4625

# The upload date for scheduled, live and past live streams / premieres in microformats

4626

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4627

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4628

upload_date = (

4629

unified_strdate(get_first(microformats, 'uploadDate'))

4630

or unified_strdate(search_meta('uploadDate')))

4631

if not upload_date or (

4632

live_status in ('not_live', None)

4633

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4634

):

4635

upload_date = strftime_or_none(

4636

self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date

4637

info['upload_date'] = upload_date

4638

4639

if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):

4640

# Newly uploaded videos' HLS formats are potentially problematic and need to be checked

4641

upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)

4642

if upload_datetime >= datetime_from_str('today-2days'):

4643

for fmt in info['formats']:

4644

if fmt.get('protocol') == 'm3u8_native':

4645

fmt['__needs_testing'] = True

4646

4647

for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(vpir, 'badges'))

4653

4654

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4655

or get_first(video_details, 'isPrivate', expected_type=bool))

4656

4657

info['availability'] = (

4658

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4659

else self._availability(

4660

is_private=is_private,

4661

needs_premium=(

4662

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4663

or False if initial_data and is_private is not None else None),

4664

needs_subscription=(

4665

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4666

or False if initial_data and is_private is not None else None),

4667

needs_auth=info['age_limit'] >= 18,

4668

is_unlisted=None if is_private is None else (

4669

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4670

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4671

4672

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4673

4674

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4680

@staticmethod

4681

def passthrough_smuggled_data(func):

4682

def _smuggle(info, smuggled_data):

4683

if info.get('_type') not in ('url', 'url_transparent'):

4684

return info

4685

if smuggled_data.get('is_music_url'):

4686

parsed_url = urllib.parse.urlparse(info['url'])

4687

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4688

smuggled_data.pop('is_music_url')

4689

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4690

if smuggled_data:

4691

info['url'] = smuggle_url(info['url'], smuggled_data)

4692

return info

4693

4694

@functools.wraps(func)

4695

def wrapper(self, url):

4696

url, smuggled_data = unsmuggle_url(url, {})

4697

if self.is_music_url(url):

4698

smuggled_data['is_music_url'] = True

4699

info_dict = func(self, url, smuggled_data)

4700

if smuggled_data:

4701

_smuggle(info_dict, smuggled_data)

4702

if info_dict.get('entries'):

4703

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

@staticmethod

def _extract_basic_item_renderer(item):

4709

# Modified from _extract_grid_item_renderer

4710

known_basic_renderers = (

4711

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4712

)

4713

for key, renderer in item.items():

4714

if not isinstance(renderer, dict):

4715

continue

4716

elif key in known_basic_renderers:

4717

return renderer

4718

elif key.startswith('grid') and key.endswith('Renderer'):

4719

return renderer

4720

4721

def _extract_channel_renderer(self, renderer):

4722

channel_id = self.ucid_or_none(renderer['channelId'])

4723

title = self._get_text(renderer, 'title')

4724

channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)

4725

channel_handle = self.handle_from_url(

4726

traverse_obj(renderer, (

4727

'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),

4728

('browseEndpoint', 'canonicalBaseUrl')),

4729

{str}), get_all=False))

4730

if not channel_handle:

4731

# As of 2023-06-01, YouTube sets subscriberCountText to the handle in search

4732

channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))

return {

'_type': 'url',

'url': channel_url,

'id': channel_id,

'ie_key': YoutubeTabIE.ie_key(),

4738

'channel': title,

4739

'uploader': title,

4740

'channel_id': channel_id,

4741

'channel_url': channel_url,

4742

'title': title,

4743

'uploader_id': channel_handle,

4744

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

4745

# See above. YouTube sets videoCountText to the subscriber text in search channel renderers.

4746

# However, in feed/channels this is set correctly to the subscriber count

4747

'channel_follower_count': traverse_obj(

4748

renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),

4749

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

4750

'playlist_count': (

4751

# videoCountText may be the subscriber count

4752

self._get_count(renderer, 'videoCountText')

4753

if self._get_count(renderer, 'subscriberCountText') is not None else None),

4754

'description': self._get_text(renderer, 'descriptionSnippet'),

4755

'channel_is_verified': True if self._has_badge(

4756

self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,

4757

}

4758

4759

def _grid_entries(self, grid_renderer):

4760

for item in grid_renderer['items']:

4761

if not isinstance(item, dict):

4762

continue

4763

renderer = self._extract_basic_item_renderer(item)

4764

if not isinstance(renderer, dict):

4765

continue

4766

title = self._get_text(renderer, 'title')

4767

4768

# playlist

4769

playlist_id = renderer.get('playlistId')

4770

if playlist_id:

4771

yield self.url_result(

4772

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4773

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4778

if video_id:

4779

yield self._extract_video(renderer)

4780

continue

4781

# channel

4782

channel_id = renderer.get('channelId')

4783

if channel_id:

4784

yield self._extract_channel_renderer(renderer)

4785

continue

4786

# generic endpoint URL support

4787

ep_url = urljoin('https://www.youtube.com/', try_get(

4788

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4789

str))

4790

if ep_url:

4791

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4792

if ie.suitable(ep_url):

4793

yield self.url_result(

4794

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4795

break

4796

4797

def _music_reponsive_list_entry(self, renderer):

4798

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4799

if video_id:

4800

title = traverse_obj(renderer, (

4801

'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',

4802

'text', 'runs', 0, 'text'))

4803

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4804

ie=YoutubeIE.ie_key(), video_id=video_id, title=title)

4805

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4806

if playlist_id:

4807

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4808

if video_id:

4809

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4810

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4811

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4812

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4813

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4814

if browse_id:

4815

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4816

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4817

4818

def _shelf_entries_from_content(self, shelf_renderer):

4819

content = shelf_renderer.get('content')

4820

if not isinstance(content, dict):

4821

return

4822

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4823

if renderer:

4824

# TODO: add support for nested playlists so each shelf is processed

4825

# as separate playlist

4826

# TODO: this includes only first N items

4827

yield from self._grid_entries(renderer)

4828

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4834

ep = try_get(

4835

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4836

str)

4837

shelf_url = urljoin('https://www.youtube.com', ep)

4838

if shelf_url:

4839

# Skipping links to another channels, note that checking for

4840

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4841

# will not work

4842

if skip_channels and '/channels?' in shelf_url:

4843

return

4844

title = self._get_text(shelf_renderer, 'title')

4845

yield self.url_result(shelf_url, video_title=title)

4846

# Shelf may not contain shelf URL, fallback to extraction from content

4847

yield from self._shelf_entries_from_content(shelf_renderer)

4848

4849

def _playlist_entries(self, video_list_renderer):

4850

for content in video_list_renderer['contents']:

4851

if not isinstance(content, dict):

4852

continue

4853

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4854

if not isinstance(renderer, dict):

4855

continue

4856

video_id = renderer.get('videoId')

4857

if not video_id:

4858

continue

4859

yield self._extract_video(renderer)

4860

4861

def _rich_entries(self, rich_grid_renderer):

4862

renderer = traverse_obj(

4863

rich_grid_renderer,

4864

('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}

4865

video_id = renderer.get('videoId')

4866

if video_id:

4867

yield self._extract_video(renderer)

4868

return

4869

playlist_id = renderer.get('playlistId')

4870

if playlist_id:

4871

yield self.url_result(

4872

f'https://www.youtube.com/playlist?list={playlist_id}',

4873

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4874

video_title=self._get_text(renderer, 'title'))

4875

return

4876

4877

def _video_entry(self, video_renderer):

4878

video_id = video_renderer.get('videoId')

4879

if video_id:

4880

return self._extract_video(video_renderer)

4881

4882

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4883

url = urljoin('https://youtube.com', traverse_obj(

4884

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4885

if url:

4886

return self.url_result(

4887

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4888

4889

def _post_thread_entries(self, post_thread_renderer):

4890

post_renderer = try_get(

4891

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4892

if not post_renderer:

4893

return

4894

# video attachment

4895

video_renderer = try_get(

4896

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4897

video_id = video_renderer.get('videoId')

4898

if video_id:

4899

entry = self._extract_video(video_renderer)

4900

if entry:

4901

yield entry

4902

# playlist attachment

4903

playlist_id = try_get(

4904

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4905

if playlist_id:

4906

yield self.url_result(

4907

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4908

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4909

# inline video links

4910

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4911

for run in runs:

4912

if not isinstance(run, dict):

4913

continue

4914

ep_url = try_get(

4915

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4916

if not ep_url:

4917

continue

4918

if not YoutubeIE.suitable(ep_url):

4919

continue

4920

ep_video_id = YoutubeIE._match_id(ep_url)

4921

if video_id == ep_video_id:

4922

continue

4923

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4924

4925

def _post_thread_continuation_entries(self, post_thread_continuation):

4926

contents = post_thread_continuation.get('contents')

4927

if not isinstance(contents, list):

4928

return

4929

for content in contents:

4930

renderer = content.get('backstagePostThreadRenderer')

4931

if isinstance(renderer, dict):

4932

yield from self._post_thread_entries(renderer)

4933

continue

4934

renderer = content.get('videoRenderer')

4935

if isinstance(renderer, dict):

4936

yield self._video_entry(renderer)

4937

4938

r''' # unused

4939

def _rich_grid_entries(self, contents):

4940

for content in contents:

4941

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4942

if video_renderer:

4943

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4949

for url in traverse_obj(renderer, (

4950

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4951

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4952

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4953

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4954

4955

def _extract_entries(self, parent_renderer, continuation_list):

4956

# continuation_list is modified in-place with continuation_list = [continuation_token]

4957

continuation_list[:] = [None]

4958

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4959

for content in contents:

4960

if not isinstance(content, dict):

4961

continue

4962

is_renderer = traverse_obj(

4963

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4964

expected_type=dict)

4965

if not is_renderer:

4966

if content.get('richItemRenderer'):

4967

for entry in self._rich_entries(content['richItemRenderer']):

4968

yield entry

4969

continuation_list[0] = self._extract_continuation(parent_renderer)

4970

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4971

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4972

yield from self._report_history_entries(table)

4973

continuation_list[0] = self._extract_continuation(table)

4974

continue

4975

4976

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4977

for isr_content in isr_contents:

4978

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4983

'gridRenderer': self._grid_entries,

4984

'reelShelfRenderer': self._grid_entries,

4985

'shelfRenderer': self._shelf_entries,

4986

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4987

'backstagePostThreadRenderer': self._post_thread_entries,

4988

'videoRenderer': lambda x: [self._video_entry(x)],

4989

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4990

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4991

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],

4992

'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),

4993

}

4994

for key, renderer in isr_content.items():

4995

if key not in known_renderers:

4996

continue

4997

for entry in known_renderers[key](renderer):

4998

if entry:

4999

yield entry

5000

continuation_list[0] = self._extract_continuation(renderer)

5001

break

5002

5003

if not continuation_list[0]:

5004

continuation_list[0] = self._extract_continuation(is_renderer)

5005

5006

if not continuation_list[0]:

5007

continuation_list[0] = self._extract_continuation(parent_renderer)

5008

5009

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

5010

continuation_list = [None]

5011

extract_entries = lambda x: self._extract_entries(x, continuation_list)

5012

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

5017

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

5018

yield from extract_entries(parent_renderer)

5019

continuation = continuation_list[0]

5020

seen_continuations = set()

5021

for page_num in itertools.count(1):

5022

if not continuation:

5023

break

5024

continuation_token = continuation.get('continuation')

5025

if continuation_token is not None and continuation_token in seen_continuations:

5026

self.write_debug('Detected YouTube feed looping - assuming end of feed.')

5027

break

5028

seen_continuations.add(continuation_token)

5029

headers = self.generate_api_headers(

5030

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

5031

response = self._extract_response(

5032

item_id=f'{item_id} page {page_num}',

5033

query=continuation, headers=headers, ytcfg=ytcfg,

5034

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

5039

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

5040

visitor_data = self._extract_visitor_data(response) or visitor_data

5041

5042

known_renderers = {

5043

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

5044

'gridPlaylistRenderer': (self._grid_entries, 'items'),

5045

'gridVideoRenderer': (self._grid_entries, 'items'),

5046

'gridChannelRenderer': (self._grid_entries, 'items'),

5047

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

5048

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

5049

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

5050

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

5051

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

5052

'playlistVideoListContinuation': (self._playlist_entries, None),

5053

'gridContinuation': (self._grid_entries, None),

5054

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

5055

'sectionListContinuation': (extract_entries, None), # for feeds

5056

}

5057

5058

continuation_items = traverse_obj(response, (

5059

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

5060

'appendContinuationItemsAction', 'continuationItems'

5061

), 'continuationContents', get_all=False)

5062

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

5063

5064

video_items_renderer = None

5065

for key in continuation_item.keys():

5066

if key not in known_renderers:

5067

continue

5068

func, parent_key = known_renderers[key]

5069

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

5070

continuation_list = [None]

5071

yield from func(video_items_renderer)

5072

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

5073

5074

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

5079

for tab_renderer in tabs:

5080

if tab_renderer.get('selected'):

5081

return tab_renderer

5082

if fatal:

5083

raise ExtractorError('Unable to find selected tab')

5084

5085

@staticmethod

5086

def _extract_tab_renderers(response):

5087

return traverse_obj(

5088

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

5089

5090

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

5091

metadata = self._extract_metadata_from_tabs(item_id, data)

5092

5093

selected_tab = self._extract_selected_tab(tabs)

5094

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

5095

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

5096

5097

return self.playlist_result(

5098

self._entries(

5099

selected_tab, metadata['id'], ytcfg,

5100

self._extract_account_syncid(ytcfg, data),

5101

self._extract_visitor_data(data, ytcfg)),

5102

**metadata)

5103

5104

def _extract_metadata_from_tabs(self, item_id, data):

5105

info = {'id': item_id}

5106

5107

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

5108

if metadata_renderer:

5109

channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),

5110

('channelUrl', {self.ucid_from_url}))

5111

info.update({

5112

'channel': metadata_renderer.get('title'),

5113

'channel_id': channel_id,

5114

})

5115

if info['channel_id']:

5116

info['id'] = info['channel_id']

5117

else:

5118

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

5119

5120

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

5121

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

5122

def _get_uncropped(url):

5123

return url_or_none((url or '').split('=')[0] + '=s0')

5124

5125

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

5126

if avatar_thumbnails:

5127

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

5128

if uncropped_avatar:

5129

avatar_thumbnails.append({

5130

'url': uncropped_avatar,

5131

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

5136

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

5137

for banner in channel_banners:

5138

banner['preference'] = -10

5139

5140

if channel_banners:

5141

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

5142

if uncropped_banner:

5143

channel_banners.append({

5144

'url': uncropped_banner,

5145

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

5150

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

5151

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

5152

5153

primary_thumbnails = self._extract_thumbnails(

5154

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

5155

playlist_thumbnails = self._extract_thumbnails(

5156

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

5157

5158

info.update({

5159

'title': (traverse_obj(metadata_renderer, 'title')

5160

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

5161

or info['id']),

5162

'availability': self._extract_availability(data),

5163

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

5164

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

5165

'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))

5166

or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),

5167

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

})

channel_handle = (

traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)

5172

or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))

if channel_handle:

info.update({

'uploader_id': channel_handle,

5177

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

5178

})

5179

5180

channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))

5181

if self._has_badge(channel_badges, BadgeType.VERIFIED):

5182

info['channel_is_verified'] = True

5183

# Playlist stats is a text runs array containing [video count, view count, last updated].

5184

# last updated or (view count and last updated) may be missing.

5185

playlist_stats = get_first(

5186

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

5187

5188

last_updated_unix = self._parse_time_text(

5189

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

5190

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

5191

info['modified_date'] = strftime_or_none(last_updated_unix)

5192

5193

info['view_count'] = self._get_count(playlist_stats, 1)

5194

if info['view_count'] is None: # 0 is allowed

5195

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

5196

if info['view_count'] is None:

5197

info['view_count'] = self._get_count(data, (

5198

'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',

5199

'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))

5200

5201

info['playlist_count'] = self._get_count(playlist_stats, 0)

5202

if info['playlist_count'] is None: # 0 is allowed

5203

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

5204

5205

if not info.get('channel_id'):

5206

owner = traverse_obj(playlist_header_renderer, 'ownerText')

5207

if not owner: # Deprecated

5208

owner = traverse_obj(

5209

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

5210

('videoOwner', 'videoOwnerRenderer', 'title'))

5211

owner_text = self._get_text(owner)

5212

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

5213

info.update({

5214

'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

5215

'channel_id': self.ucid_or_none(browse_ep.get('browseId')),

5216

'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))

})

info.update({

'uploader': info['channel'],

5221

'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),

5222

'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

5228

first_id = last_id = response = None

5229

for page_num in itertools.count(1):

5230

videos = list(self._playlist_entries(playlist))

5231

if not videos:

5232

return

5233

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

5234

if start >= len(videos):

5235

return

5236

yield from videos[start:]

5237

first_id = first_id or videos[0]['id']

5238

last_id = videos[-1]['id']

5239

watch_endpoint = try_get(

5240

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

5241

headers = self.generate_api_headers(

5242

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5243

visitor_data=self._extract_visitor_data(response, data, ytcfg))

5244

query = {

5245

'playlistId': playlist_id,

5246

'videoId': watch_endpoint.get('videoId') or last_id,

5247

'index': watch_endpoint.get('index') or len(videos),

5248

'params': watch_endpoint.get('params') or 'OAE%3D'

5249

}

5250

response = self._extract_response(

5251

item_id='%s page %d' % (playlist_id, page_num),

5252

query=query, ep='next', headers=headers, ytcfg=ytcfg,

5253

check_get_keys='contents'

5254

)

5255

playlist = try_get(

5256

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

5257

5258

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

5259

title = playlist.get('title') or try_get(

5260

data, lambda x: x['titleText']['simpleText'], str)

5261

playlist_id = playlist.get('playlistId') or item_id

5262

5263

# Delegating everything except mix playlists to regular tab-based playlist URL

5264

playlist_url = urljoin(url, try_get(

5265

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

5266

str))

5267

5268

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

5269

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

5270

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

5271

5272

if playlist_url and playlist_url != url and not is_known_unviewable:

5273

return self.url_result(

5274

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

5275

video_title=title)

5276

5277

return self.playlist_result(

5278

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

5279

playlist_id=playlist_id, playlist_title=title)

5280

5281

def _extract_availability(self, data):

5282

"""

5283

Gets the availability of a given playlist/tab.

5284

Note: Unless YouTube tells us explicitly, we do not assume it is public

5285

@param data: response

5286

"""

5287

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

5288

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

5289

player_header_privacy = playlist_header_renderer.get('privacy')

5290

5291

badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))

5292

5293

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

5294

privacy_setting_icon = get_first(

5295

(playlist_header_renderer, sidebar_renderer),

5296

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

5297

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

5298

expected_type=str)

5299

5300

microformats_is_unlisted = traverse_obj(

5301

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

5306

or player_header_privacy == 'PUBLIC'

5307

or privacy_setting_icon == 'PRIVACY_PUBLIC')

5308

else self._availability(

5309

is_private=(

5310

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

5311

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

5312

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

5313

is_unlisted=(

5314

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

5315

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

5316

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

5317

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

5318

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

5319

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

5324

sidebar_renderer = try_get(

5325

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

5326

for item in sidebar_renderer:

5327

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

5332

"""

5333

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

5334

"""

5335

is_playlist = bool(traverse_obj(

5336

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

5337

if not is_playlist:

5338

return

5339

headers = self.generate_api_headers(

5340

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5341

visitor_data=self._extract_visitor_data(data, ytcfg))

5342

query = {

5343

'params': 'wgYCCAA=',

5344

'browseId': f'VL{item_id}'

5345

}

5346

return self._extract_response(

5347

item_id=item_id, headers=headers, query=query,

5348

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

5349

note='Redownloading playlist API JSON with unavailable videos')

5350

5351

@functools.cached_property

5352

def skip_webpage(self):

5353

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

5354

5355

def _extract_webpage(self, url, item_id, fatal=True):

5356

webpage, data = None, None

5357

for retry in self.RetryManager(fatal=fatal):

5358

try:

5359

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

5360

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

5361

except ExtractorError as e:

5362

if isinstance(e.cause, network_exceptions):

5363

if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):

5364

retry.error = e

5365

continue

5366

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

5371

except ExtractorError as e:

5372

self._error_or_warning(e, fatal=fatal)

5373

break

5374

5375

# Sometimes youtube returns a webpage with incomplete ytInitialData

5376

# See: https://github.com/yt-dlp/yt-dlp/issues/116

5377

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

5378

retry.error = ExtractorError('Incomplete yt initial data received')

data = None

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

5385

"""Use if failed to extract ytcfg (and data) from initial webpage"""

5386

if not ytcfg and self.is_authenticated:

5387

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

5388

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

5389

raise ExtractorError(

5390

f'{msg}. If you are not downloading private content, or '

5391

'your cookies are only for the first account and channel,'

5392

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

5393

expected=True)

5394

self.report_warning(msg, only_once=True)

5395

5396

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

5397

data = None

5398

if not self.skip_webpage:

5399

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

5400

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

5401

# Reject webpage data if redirected to home page without explicitly requesting

5402

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

5403

if (url != 'https://www.youtube.com/feed/recommended'

5404

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

5405

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

5406

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

5407

if fatal:

5408

raise ExtractorError(msg, expected=True)

5409

self.report_warning(msg, only_once=True)

5410

if not data:

5411

self._report_playlist_authcheck(ytcfg, fatal=fatal)

5412

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

5413

return data, ytcfg

5414

5415

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

5416

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

5417

resolve_response = self._extract_response(

5418

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

5419

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

5420

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

5421

for ep_key, ep in endpoints.items():

5422

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

5423

if params:

5424

return self._extract_response(

5425

item_id=item_id, query=params, ep=ep, headers=headers,

5426

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

5427

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

5428

err_note = 'Failed to resolve url (does the playlist exist?)'

5429

if fatal:

5430

raise ExtractorError(err_note, expected=True)

5431

self.report_warning(err_note, item_id)

5432

5433

_SEARCH_PARAMS = None

5434

5435

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

5436

data = {'query': query}

5437

if params is NO_DEFAULT:

5438

params = self._SEARCH_PARAMS

5439

if params:

5440

data['params'] = params

5441

5442

content_keys = (

5443

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

5444

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

5445

# ytmusic search

5446

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

5447

('continuationContents', ),

5448

)

5449

display_id = f'query "{query}"'

5450

check_get_keys = tuple({keys[0] for keys in content_keys})

5451

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

5452

self._report_playlist_authcheck(ytcfg, fatal=False)

5453

5454

continuation_list = [None]

5455

search = None

5456

for page_num in itertools.count(1):

5457

data.update(continuation_list[0] or {})

5458

headers = self.generate_api_headers(

5459

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

5460

search = self._extract_response(

5461

item_id=f'{display_id} page {page_num}', ep='search', query=data,

5462

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

5463

slr_contents = traverse_obj(search, *content_keys)

5464

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

5465

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

5470

IE_DESC = 'YouTube Tabs'

5471

_VALID_URL = r'''(?x:

5472

https?://

5473

(?!consent\.)(?:\w+\.)?

5474

(?:

5475

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5480

(?P<not_channel>

5481

feed/|hashtag/|

5482

(?:playlist|watch)\?.*?\blist=

5483

)|

5484

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5489

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5490

}

5491

IE_NAME = 'youtube:tab'

5492

5493

_TESTS = [{

5494

'note': 'playlists, multipage',

5495

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5496

'playlist_mincount': 94,

5497

'info_dict': {

5498

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5499

'title': 'Igor Kleiner Ph.D. - Playlists',

5500

'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',

5501

'uploader': 'Igor Kleiner Ph.D.',

5502

'uploader_id': '@IgorDataScience',

5503

'uploader_url': 'https://www.youtube.com/@IgorDataScience',

5504

'channel': 'Igor Kleiner Ph.D.',

5505

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5506

'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],

5507

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5508

'channel_follower_count': int

5509

},

5510

}, {

5511

'note': 'playlists, multipage, different order',

5512

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5513

'playlist_mincount': 94,

5514

'info_dict': {

5515

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5516

'title': 'Igor Kleiner Ph.D. - Playlists',

5517

'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',

5518

'uploader': 'Igor Kleiner Ph.D.',

5519

'uploader_id': '@IgorDataScience',

5520

'uploader_url': 'https://www.youtube.com/@IgorDataScience',

5521

'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],

5522

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5523

'channel': 'Igor Kleiner Ph.D.',

5524

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5525

'channel_follower_count': int

5526

},

5527

}, {

5528

'note': 'playlists, series',

5529

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5530

'playlist_mincount': 5,

5531

'info_dict': {

5532

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5533

'title': '3Blue1Brown - Playlists',

5534

'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',

5535

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5536

'channel': '3Blue1Brown',

5537

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5538

'uploader_id': '@3blue1brown',

5539

'uploader_url': 'https://www.youtube.com/@3blue1brown',

5540

'uploader': '3Blue1Brown',

5541

'tags': ['Mathematics'],

5542

'channel_follower_count': int,

5543

'channel_is_verified': True,

5544

},

5545

}, {

5546

'note': 'playlists, singlepage',

5547

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5548

'playlist_mincount': 4,

5549

'info_dict': {

5550

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5551

'title': 'ThirstForScience - Playlists',

5552

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5553

'uploader': 'ThirstForScience',

5554

'uploader_url': 'https://www.youtube.com/@ThirstForScience',

5555

'uploader_id': '@ThirstForScience',

5556

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5557

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5558

'tags': 'count:12',

5559

'channel': 'ThirstForScience',

5560

'channel_follower_count': int

5561

}

5562

}, {

5563

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5564

'only_matching': True,

5565

}, {

5566

'note': 'basic, single video playlist',

5567

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5568

'info_dict': {

5569

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5570

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5575

'channel': 'Sergey M.',

5576

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5577

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5578

'availability': 'public',

5579

'uploader': 'Sergey M.',

5580

'uploader_url': 'https://www.youtube.com/@sergeym.6173',

5581

'uploader_id': '@sergeym.6173',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5586

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5587

'info_dict': {

5588

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5589

'title': 'youtube-dl empty playlist',

5590

'tags': [],

5591

'channel': 'Sergey M.',

5592

'description': '',

5593

'modified_date': '20230921',

5594

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5595

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5596

'availability': 'unlisted',

5597

'uploader_url': 'https://www.youtube.com/@sergeym.6173',

5598

'uploader_id': '@sergeym.6173',

5599

'uploader': 'Sergey M.',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5605

'info_dict': {

5606

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5607

'title': 'lex will - Home',

5608

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5609

'uploader': 'lex will',

5610

'uploader_id': '@lexwill718',

5611

'channel': 'lex will',

5612

'tags': ['bible', 'history', 'prophesy'],

5613

'uploader_url': 'https://www.youtube.com/@lexwill718',

5614

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5615

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5616

'channel_follower_count': int

5617

},

5618

'playlist_mincount': 2,

5619

}, {

5620

'note': 'Videos tab',

5621

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5622

'info_dict': {

5623

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5624

'title': 'lex will - Videos',

5625

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5626

'uploader': 'lex will',

5627

'uploader_id': '@lexwill718',

5628

'tags': ['bible', 'history', 'prophesy'],

5629

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5630

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5631

'uploader_url': 'https://www.youtube.com/@lexwill718',

5632

'channel': 'lex will',

5633

'channel_follower_count': int

5634

},

5635

'playlist_mincount': 975,

5636

}, {

5637

'note': 'Videos tab, sorted by popular',

5638

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5639

'info_dict': {

5640

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5641

'title': 'lex will - Videos',

5642

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5643

'uploader': 'lex will',

5644

'uploader_id': '@lexwill718',

5645

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5646

'uploader_url': 'https://www.youtube.com/@lexwill718',

5647

'channel': 'lex will',

5648

'tags': ['bible', 'history', 'prophesy'],

5649

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5650

'channel_follower_count': int

5651

},

5652

'playlist_mincount': 199,

5653

}, {

5654

'note': 'Playlists tab',

5655

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5656

'info_dict': {

5657

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5658

'title': 'lex will - Playlists',

5659

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5660

'uploader': 'lex will',

5661

'uploader_id': '@lexwill718',

5662

'uploader_url': 'https://www.youtube.com/@lexwill718',

5663

'channel': 'lex will',

5664

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5665

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5666

'tags': ['bible', 'history', 'prophesy'],

5667

'channel_follower_count': int

5668

},

5669

'playlist_mincount': 17,

5670

}, {

5671

'note': 'Community tab',

5672

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5673

'info_dict': {

5674

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5675

'title': 'lex will - Community',

5676

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5677

'channel': 'lex will',

5678

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5679

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5680

'tags': ['bible', 'history', 'prophesy'],

5681

'channel_follower_count': int,

5682

'uploader_url': 'https://www.youtube.com/@lexwill718',

5683

'uploader_id': '@lexwill718',

5684

'uploader': 'lex will',

5685

},

5686

'playlist_mincount': 18,

5687

}, {

5688

'note': 'Channels tab',

5689

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5690

'info_dict': {

5691

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5692

'title': 'lex will - Channels',

5693

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5694

'channel': 'lex will',

5695

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5696

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5697

'tags': ['bible', 'history', 'prophesy'],

5698

'channel_follower_count': int,

5699

'uploader_url': 'https://www.youtube.com/@lexwill718',

5700

'uploader_id': '@lexwill718',

5701

'uploader': 'lex will',

5702

},

5703

'playlist_mincount': 12,

5704

}, {

5705

'note': 'Search tab',

5706

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5707

'playlist_mincount': 40,

5708

'info_dict': {

5709

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5710

'title': '3Blue1Brown - Search - linear algebra',

5711

'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',

5712

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5713

'tags': ['Mathematics'],

5714

'channel': '3Blue1Brown',

5715

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5716

'channel_follower_count': int,

5717

'uploader_url': 'https://www.youtube.com/@3blue1brown',

5718

'uploader_id': '@3blue1brown',

5719

'uploader': '3Blue1Brown',

5720

'channel_is_verified': True,

5721

},

5722

}, {

5723

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5724

'only_matching': True,

5725

}, {

5726

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5727

'only_matching': True,

5728

}, {

5729

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5730

'only_matching': True,

5731

}, {

5732

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5733

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5734

'info_dict': {

5735

'title': '29C3: Not my department',

5736

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5737

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5738

'tags': [],

5739

'view_count': int,

5740

'modified_date': '20150605',

5741

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5742

'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',

5743

'channel': 'Christiaan008',

5744

'availability': 'public',

5745

'uploader_id': '@ChRiStIaAn008',

5746

'uploader': 'Christiaan008',

5747

'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',

5748

},

5749

'playlist_count': 96,

5750

}, {

5751

'note': 'Large playlist',

5752

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5753

'info_dict': {

5754

'title': 'Uploads from Cauchemar',

5755

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5756

'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',

5757

'tags': [],

5758

'modified_date': r're:\d{8}',

5759

'channel': 'Cauchemar',

5760

'view_count': int,

5761

'description': '',

5762

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5763

'availability': 'public',

5764

'uploader_id': '@Cauchemar89',

5765

'uploader': 'Cauchemar',

5766

'uploader_url': 'https://www.youtube.com/@Cauchemar89',

5767

},

5768

'playlist_mincount': 1123,

5769

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5770

}, {

5771

'note': 'even larger playlist, 8832 videos',

5772

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5773

'only_matching': True,

5774

}, {

5775

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5776

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5777

'info_dict': {

5778

'title': 'Uploads from Interstellar Movie',

5779

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5780

'tags': [],

5781

'view_count': int,

5782

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5783

'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',

5784

'channel': 'Interstellar Movie',

5785

'description': '',

5786

'modified_date': r're:\d{8}',

5787

'availability': 'public',

5788

'uploader_id': '@InterstellarMovie',

5789

'uploader': 'Interstellar Movie',

5790

'uploader_url': 'https://www.youtube.com/@InterstellarMovie',

5791

},

5792

'playlist_mincount': 21,

5793

}, {

5794

'note': 'Playlist with "show unavailable videos" button',

5795

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5796

'info_dict': {

5797

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5798

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5799

'view_count': int,

5800

'channel': 'Phim Siêu Nhân Nhật Bản',

5801

'tags': [],

5802

'description': '',

5803

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5804

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5805

'modified_date': r're:\d{8}',

5806

'availability': 'public',

5807

'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',

5808

'uploader_id': '@phimsieunhannhatban',

5809

'uploader': 'Phim Siêu Nhân Nhật Bản',

5810

},

5811

'playlist_mincount': 200,

5812

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5813

}, {

5814

'note': 'Playlist with unavailable videos in page 7',

5815

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5816

'info_dict': {

5817

'title': 'Uploads from BlankTV',

5818

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5819

'channel': 'BlankTV',

5820

'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',

5821

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5822

'view_count': int,

5823

'tags': [],

5824

'modified_date': r're:\d{8}',

5825

'description': '',

5826

'availability': 'public',

5827

'uploader_id': '@blanktv',

5828

'uploader': 'BlankTV',

5829

'uploader_url': 'https://www.youtube.com/@blanktv',

5830

},

5831

'playlist_mincount': 1000,

5832

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5833

}, {

5834

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5835

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5836

'info_dict': {

5837

'title': 'Data Analysis with Dr Mike Pound',

5838

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5839

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5840

'tags': [],

5841

'view_count': int,

5842

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5843

'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',

5844

'channel': 'Computerphile',

5845

'availability': 'public',

5846

'modified_date': '20190712',

5847

'uploader_id': '@Computerphile',

5848

'uploader': 'Computerphile',

5849

'uploader_url': 'https://www.youtube.com/@Computerphile',

5850

},

5851

'playlist_mincount': 11,

5852

}, {

5853

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5854

'only_matching': True,

5855

}, {

5856

'note': 'Playlist URL that does not actually serve a playlist',

5857

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5862

'upload_date': '20150526',

5863

'license': 'Standard YouTube License',

5864

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5865

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5872

},

5873

'skip': 'This video is not available.',

5874

'add_ie': [YoutubeIE.ie_key()],

5875

}, {

5876

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5877

'only_matching': True,

5878

}, {

5879

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5880

'only_matching': True,

5881

}, {

5882

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5883

'info_dict': {

5884

'id': 'hGkQjiJLjWQ', # This will keep changing

5885

'ext': 'mp4',

5886

'title': str,

5887

'upload_date': r're:\d{8}',

5888

'description': str,

5889

'categories': ['News & Politics'],

5890

'tags': list,

5891

'like_count': int,

5892

'release_timestamp': int,

5893

'channel': 'Sky News',

5894

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5895

'age_limit': 0,

5896

'view_count': int,

5897

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5898

'playable_in_embed': True,

5899

'release_date': r're:\d+',

5900

'availability': 'public',

5901

'live_status': 'is_live',

5902

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5903

'channel_follower_count': int,

5904

'concurrent_view_count': int,

5905

'uploader_url': 'https://www.youtube.com/@SkyNews',

5906

'uploader_id': '@SkyNews',

5907

'uploader': 'Sky News',

5908

'channel_is_verified': True,

5909

},

5910

'params': {

5911

'skip_download': True,

5912

},

5913

'expected_warnings': ['Ignoring subtitle tracks found in '],

5914

}, {

5915

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5920

'upload_date': '20150715',

5921

'license': 'Standard YouTube License',

5922

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5923

'categories': ['News & Politics'],

5924

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5929

},

5930

'only_matching': True,

5931

}, {

5932

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5933

'only_matching': True,

5934

}, {

5935

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5936

'only_matching': True,

5937

}, {

5938

'note': 'A channel that is not live. Should raise error',

5939

'url': 'https://www.youtube.com/user/numberphile/live',

5940

'only_matching': True,

5941

}, {

5942

'url': 'https://www.youtube.com/feed/trending',

5943

'only_matching': True,

5944

}, {

5945

'url': 'https://www.youtube.com/feed/library',

5946

'only_matching': True,

5947

}, {

5948

'url': 'https://www.youtube.com/feed/history',

5949

'only_matching': True,

5950

}, {

5951

'url': 'https://www.youtube.com/feed/subscriptions',

5952

'only_matching': True,

5953

}, {

5954

'url': 'https://www.youtube.com/feed/watch_later',

5955

'only_matching': True,

5956

}, {

5957

'note': 'Recommended - redirects to home page.',

5958

'url': 'https://www.youtube.com/feed/recommended',

5959

'only_matching': True,

5960

}, {

5961

'note': 'inline playlist with not always working continuations',

5962

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5963

'only_matching': True,

5964

}, {

5965

'url': 'https://www.youtube.com/course',

5966

'only_matching': True,

5967

}, {

5968

'url': 'https://www.youtube.com/zsecurity',

5969

'only_matching': True,

5970

}, {

5971

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5972

'only_matching': True,

5973

}, {

5974

'url': 'https://www.youtube.com/TheYoungTurks/live',

5975

'only_matching': True,

5976

}, {

5977

'url': 'https://www.youtube.com/hashtag/cctv9',

5978

'info_dict': {

5979

'id': 'cctv9',

5980

'title': 'cctv9 - All',

5981

'tags': [],

5982

},

5983

'playlist_mincount': 300, # not consistent but should be over 300

5984

}, {

5985

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5986

'only_matching': True,

5987

}, {

5988

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5989

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5990

'only_matching': True

5991

}, {

5992

'note': '/browse/ should redirect to /channel/',

5993

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5994

'only_matching': True

5995

}, {

5996

'note': 'VLPL, should redirect to playlist?list=PL...',

5997

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5998

'info_dict': {

5999

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

6000

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

6001

'title': 'NCS : All Releases 💿',

6002

'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',

6003

'modified_date': r're:\d{8}',

6004

'view_count': int,

6005

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

6006

'tags': [],

6007

'channel': 'NoCopyrightSounds',

6008

'availability': 'public',

6009

'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',

6010

'uploader': 'NoCopyrightSounds',

6011

'uploader_id': '@NoCopyrightSounds',

6012

},

6013

'playlist_mincount': 166,

6014

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],

6015

}, {

6016

# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos

6017

'note': 'Topic, should redirect to playlist?list=UU...',

6018

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

6019

'info_dict': {

6020

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

6021

'title': 'Uploads from Royalty Free Music - Topic',

6022

'tags': [],

6023

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

6024

'channel': 'Royalty Free Music - Topic',

6025

'view_count': int,

6026

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

6027

'modified_date': r're:\d{8}',

6028

'description': '',

6029

'availability': 'public',

6030

'uploader': 'Royalty Free Music - Topic',

6031

},

6032

'playlist_mincount': 101,

6033

'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],

6034

}, {

6035

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

6036

# Treat as a general feed

6037

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

6038

'info_dict': {

6039

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

6040

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

6041

'tags': [],

6042

},

6043

'playlist_mincount': 9,

6044

}, {

6045

'note': 'Youtube music Album',

6046

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

6047

'info_dict': {

6048

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

6049

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

6054

'modified_date': r're:\d{8}',

6055

},

6056

'playlist_count': 50,

6057

'expected_warnings': ['YouTube Music is not directly supported'],

6058

}, {

6059

'note': 'unlisted single video playlist',

6060

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

6061

'info_dict': {

6062

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

6063

'title': 'yt-dlp unlisted playlist test',

6064

'availability': 'unlisted',

6065

'tags': [],

6066

'modified_date': '20220418',

6067

'channel': 'colethedj',

6068

'view_count': int,

6069

'description': '',

6070

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

6071

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

6072

'uploader_url': 'https://www.youtube.com/@colethedj1894',

6073

'uploader_id': '@colethedj1894',

6074

'uploader': 'colethedj',

},

'playlist': [{

'info_dict': {

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

'id': 'BaW_jenozKc',

'_type': 'url',

'ie_key': 'Youtube',

'duration': 10,

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

6084

'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

6085

'view_count': int,

6086

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',

6087

'channel': 'Philipp Hagemeister',

6088

'uploader_id': '@PhilippHagemeister',

6089

'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',

6090

'uploader': 'Philipp Hagemeister',

}

}],

'playlist_count': 1,

'params': {'extract_flat': True},

6095

}, {

6096

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

6097

'url': 'https://www.youtube.com/feed/recommended',

6098

'info_dict': {

6099

'id': 'recommended',

6100

'title': 'recommended',

6101

'tags': [],

6102

},

6103

'playlist_mincount': 50,

6104

'params': {

6105

'skip_download': True,

6106

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

6107

},

6108

}, {

6109

'note': 'API Fallback: /videos tab, sorted by oldest first',

6110

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

6111

'info_dict': {

6112

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

6113

'title': 'Cody\'sLab - Videos',

6114

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

6115

'channel': 'Cody\'sLab',

6116

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

6117

'tags': [],

6118

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

6119

'channel_follower_count': int

6120

},

6121

'playlist_mincount': 650,

6122

'params': {

6123

'skip_download': True,

6124

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

6125

},

6126

'skip': 'Query for sorting no longer works',

6127

}, {

6128

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

6129

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

6130

'info_dict': {

6131

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

6132

'title': 'Uploads from Royalty Free Music - Topic',

6133

'modified_date': r're:\d{8}',

6134

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

6135

'description': '',

6136

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

6137

'tags': [],

6138

'channel': 'Royalty Free Music - Topic',

6139

'view_count': int,

6140

'availability': 'public',

6141

'uploader': 'Royalty Free Music - Topic',

6142

},

6143

'playlist_mincount': 101,

6144

'params': {

6145

'skip_download': True,

6146

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

6147

},

6148

'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],

6149

}, {

6150

'note': 'non-standard redirect to regional channel',

6151

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

6152

'only_matching': True

6153

}, {

6154

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

6155

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

6156

'info_dict': {

6157

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

6158

'modified_date': '20220407',

6159

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

6160

'tags': [],

6161

'availability': 'unlisted',

6162

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

6163

'channel': 'pukkandan',

6164

'description': 'Test for collaborative playlist',

6165

'title': 'yt-dlp test - collaborative playlist',

6166

'view_count': int,

6167

'uploader_url': 'https://www.youtube.com/@pukkandan',

6168

'uploader_id': '@pukkandan',

6169

'uploader': 'pukkandan',

6170

},

6171

'playlist_mincount': 2

6172

}, {

6173

'note': 'translated tab name',

6174

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

6175

'info_dict': {

6176

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6177

'tags': [],

6178

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6179

'description': 'test description',

6180

'title': 'cole-dlp-test-acc - 再生リスト',

6181

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6182

'channel': 'cole-dlp-test-acc',

6183

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6184

'uploader_id': '@coletdjnz',

6185

'uploader': 'cole-dlp-test-acc',

6186

},

6187

'playlist_mincount': 1,

6188

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

6189

'expected_warnings': ['Preferring "ja"'],

6190

}, {

6191

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

6192

'note': 'preferred lang set with playlist with translated video titles',

6193

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

6194

'info_dict': {

6195

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

6196

'tags': [],

6197

'view_count': int,

6198

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6199

'channel': 'cole-dlp-test-acc',

6200

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6201

'description': 'test',

6202

'title': 'dlp test playlist',

6203

'availability': 'public',

6204

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6205

'uploader_id': '@coletdjnz',

6206

'uploader': 'cole-dlp-test-acc',

6207

},

6208

'playlist_mincount': 1,

6209

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

6210

'expected_warnings': ['Preferring "ja"'],

6211

}, {

6212

# shorts audio pivot for 2GtVksBMYFM.

6213

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

6214

'info_dict': {

6215

'id': 'sfv_audio_pivot',

6216

'title': 'sfv_audio_pivot',

6217

'tags': [],

6218

},

6219

'playlist_mincount': 50,

6220

6221

}, {

6222

# Channel with a real live tab (not to be mistaken with streams tab)

6223

# Do not treat like it should redirect to live stream

6224

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

6225

'info_dict': {

6226

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

6227

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

6228

'tags': [],

6229

},

6230

'playlist_mincount': 20,

6231

}, {

6232

# Tab name is not the same as tab id

6233

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

6234

'info_dict': {

6235

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6236

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

6237

'tags': [],

6238

},

6239

'playlist_mincount': 8,

6240

}, {

6241

# Home tab id is literally home. Not to get mistaken with featured

6242

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

6243

'info_dict': {

6244

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6245

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

6246

'tags': [],

6247

},

6248

'playlist_mincount': 8,

6249

}, {

6250

# Should get three playlists for videos, shorts and streams tabs

6251

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

6252

'info_dict': {

6253

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

6254

'title': 'Polka Ch. 尾丸ポルカ',

6255

'channel_follower_count': int,

6256

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

6257

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

6258

'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',

6259

'channel': 'Polka Ch. 尾丸ポルカ',

6260

'tags': 'count:35',

6261

'uploader_url': 'https://www.youtube.com/@OmaruPolka',

6262

'uploader': 'Polka Ch. 尾丸ポルカ',

6263

'uploader_id': '@OmaruPolka',

6264

'channel_is_verified': True,

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

6269

# TODO: fix channel description

6270

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

6271

'info_dict': {

6272

'id': 'UC0intLFzLaudFG-xAvUEO-A',

6273

'title': 'Not Just Bikes - Shorts',

6274

'tags': 'count:10',

6275

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

6276

'description': 'md5:5e82545b3a041345927a92d0585df247',

6277

'channel_follower_count': int,

6278

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

6279

'channel': 'Not Just Bikes',

6280

'uploader_url': 'https://www.youtube.com/@NotJustBikes',

6281

'uploader': 'Not Just Bikes',

6282

'uploader_id': '@NotJustBikes',

6283

'channel_is_verified': True,

6284

},

6285

'playlist_mincount': 10,

6286

}, {

6287

# Streams tab

6288

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

6289

'info_dict': {

6290

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

6291

'title': '中村悠一 - Live',

6292

'tags': 'count:7',

6293

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

6294

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

6295

'channel': '中村悠一',

6296

'channel_follower_count': int,

6297

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

6298

'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',

6299

'uploader_id': '@Yuichi-Nakamura',

6300

'uploader': '中村悠一',

6301

},

6302

'playlist_mincount': 60,

6303

}, {

6304

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

6305

# See test_youtube_lists

6306

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

6307

'only_matching': True,

6308

}, {

6309

# No uploads and no UCID given. Should fail with no uploads error

6310

# See test_youtube_lists

6311

'url': 'https://www.youtube.com/news',

6312

'only_matching': True

6313

}, {

6314

# No videos tab but has a shorts tab

6315

'url': 'https://www.youtube.com/c/TKFShorts',

6316

'info_dict': {

6317

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6318

'title': 'Shorts Break - Shorts',

6319

'tags': 'count:48',

6320

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6321

'channel': 'Shorts Break',

6322

'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',

6323

'channel_follower_count': int,

6324

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

6325

'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',

6326

'uploader': 'Shorts Break',

6327

'uploader_id': '@ShortsBreak_Official',

6328

},

6329

'playlist_mincount': 30,

6330

}, {

6331

# Trending Now Tab. tab id is empty

6332

'url': 'https://www.youtube.com/feed/trending',

6333

'info_dict': {

6334

'id': 'trending',

6335

'title': 'trending - Now',

6336

'tags': [],

6337

},

6338

'playlist_mincount': 30,

6339

}, {

6340

# Trending Gaming Tab. tab id is empty

6341

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

6342

'info_dict': {

6343

'id': 'trending',

6344

'title': 'trending - Gaming',

6345

'tags': [],

6346

},

6347

'playlist_mincount': 30,

6348

}, {

6349

# Shorts url result in shorts tab

6350

# TODO: Fix channel id extraction

6351

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

6352

'info_dict': {

6353

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6354

'title': 'cole-dlp-test-acc - Shorts',

6355

'channel': 'cole-dlp-test-acc',

6356

'description': 'test description',

6357

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6358

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6359

'tags': [],

6360

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6361

'uploader_id': '@coletdjnz',

6362

'uploader': 'cole-dlp-test-acc',

},

'playlist': [{

'info_dict': {

# Channel data is not currently available for short renderers (as of 2023-03-01)

6367

'_type': 'url',

6368

'ie_key': 'Youtube',

6369

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

6370

'id': 'sSM9J5YH_60',

6371

'title': 'SHORT short',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

6377

}, {

6378

# Live video status should be extracted

6379

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

6380

'info_dict': {

6381

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6382

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

6390

'id': str,

6391

'title': str,

6392

'live_status': 'is_live',

6393

'channel_id': str,

6394

'channel_url': str,

6395

'concurrent_view_count': int,

'channel': str,

'uploader': str,

'uploader_url': str,

'uploader_id': str,

'channel_is_verified': bool, # this will keep changing

6401

}

6402

}],

6403

'params': {'extract_flat': True, 'playlist_items': '1'},

6404

'playlist_mincount': 1

6405

}, {

6406

# Channel renderer metadata. Contains number of videos on the channel

6407

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',

6408

'info_dict': {

6409

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6410

'title': 'cole-dlp-test-acc - Channels',

6411

'channel': 'cole-dlp-test-acc',

6412

'description': 'test description',

6413

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6414

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6415

'tags': [],

6416

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6417

'uploader_id': '@coletdjnz',

6418

'uploader': 'cole-dlp-test-acc',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'YoutubeTab',

6424

'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6425

'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6426

'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6427

'title': 'PewDiePie',

6428

'channel': 'PewDiePie',

6429

'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6430

'thumbnails': list,

6431

'channel_follower_count': int,

6432

'playlist_count': int,

6433

'uploader': 'PewDiePie',

6434

'uploader_url': 'https://www.youtube.com/@PewDiePie',

6435

'uploader_id': '@PewDiePie',

6436

'channel_is_verified': True,

6437

}

6438

}],

6439

'params': {'extract_flat': True},

6440

}, {

6441

'url': 'https://www.youtube.com/@3blue1brown/about',

6442

'info_dict': {

6443

'id': '@3blue1brown',

6444

'tags': ['Mathematics'],

6445

'title': '3Blue1Brown',

6446

'channel_follower_count': int,

6447

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

6448

'channel': '3Blue1Brown',

6449

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

6450

'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',

6451

'uploader_url': 'https://www.youtube.com/@3blue1brown',

6452

'uploader_id': '@3blue1brown',

6453

'uploader': '3Blue1Brown',

6454

'channel_is_verified': True,

},

'playlist_count': 0,

}, {

# Podcasts tab, with rich entry playlistRenderers

6459

'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',

6460

'info_dict': {

6461

'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',

6462

'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',

6463

'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',

6464

'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',

6465

'title': '99 Percent Invisible - Podcasts',

6466

'uploader': '99 Percent Invisible',

6467

'channel_follower_count': int,

6468

'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',

6469

'tags': [],

6470

'channel': '99 Percent Invisible',

6471

'uploader_id': '@99percentinvisiblepodcast',

},

'playlist_count': 0,

}, {

# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)

6476

'url': 'https://www.youtube.com/@AHimitsu/releases',

6477

'info_dict': {

6478

'id': 'UCgFwu-j5-xNJml2FtTrrB3A',

6479

'channel': 'A Himitsu',

6480

'uploader_url': 'https://www.youtube.com/@AHimitsu',

6481

'title': 'A Himitsu - Releases',

6482

'uploader_id': '@AHimitsu',

6483

'uploader': 'A Himitsu',

6484

'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',

6485

'tags': 'count:12',

6486

'description': 'I make music',

6487

'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',

6488

'channel_follower_count': int,

6489

'channel_is_verified': True,

6490

},

6491

'playlist_mincount': 10,

6492

}, {

6493

# Playlist with only shorts, shown as reel renderers

6494

# FIXME: future: YouTube currently doesn't give continuation for this,

6495

# may do in future.

6496

'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',

6497

'info_dict': {

6498

'id': 'UUxqPAgubo4coVn9Lx1FuKcg',

6499

'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',

6500

'view_count': int,

6501

'uploader_id': '@BangyShorts',

6502

'description': '',

6503

'uploader_url': 'https://www.youtube.com/@BangyShorts',

6504

'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',

6505

'channel': 'Bangy Shorts',

6506

'uploader': 'Bangy Shorts',

6507

'tags': [],

6508

'availability': 'public',

6509

'modified_date': r're:\d{8}',

6510

'title': 'Uploads from Bangy Shorts',

6511

},

6512

'playlist_mincount': 100,

6513

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6514

}, {

6515

'note': 'Tags containing spaces',

6516

'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',

6517

'playlist_count': 3,

6518

'info_dict': {

6519

'id': 'UC7_YxT-KID8kRbqZo7MyscQ',

6520

'channel': 'Markiplier',

6521

'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',

6522

'title': 'Markiplier',

6523

'channel_follower_count': int,

6524

'description': 'md5:0c010910558658824402809750dc5d97',

6525

'uploader_id': '@markiplier',

6526

'uploader_url': 'https://www.youtube.com/@markiplier',

6527

'uploader': 'Markiplier',

6528

'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',

6529

'channel_is_verified': True,

6530

'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',

6531

'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',

6532

'challenges', 'sketches', 'scary games', 'funny games', 'rage games',

'mark fischbach'],

},

}]

@classmethod

def suitable(cls, url):

6539

return False if YoutubeIE.suitable(url) else super().suitable(url)

6540

6541

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

6542

6543

def _get_url_mobj(self, url):

6544

mobj = self._URL_RE.match(url).groupdict()

6545

mobj.update((k, '') for k, v in mobj.items() if v is None)

6546

return mobj

6547

6548

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

6549

tab_name = (tab.get('title') or '').lower()

6550

tab_url = urljoin(base_url, traverse_obj(

6551

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

6552

6553

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

6554

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

6555

if tab_id:

6556

return {

6557

'TAB_ID_SPONSORSHIPS': 'membership',

6558

}.get(tab_id, tab_id), tab_name

6559

6560

# Fallback to tab name if we cannot get the tab id.

6561

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

6562

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

6563

if tab_name:

6564

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

6569

6570

def _has_tab(self, tabs, tab_id):

6571

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

6572

6573

def _empty_playlist(self, item_id, data):

6574

return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))

6575

6576

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

6577

def _real_extract(self, url, smuggled_data):

6578

item_id = self._match_id(url)

6579

url = urllib.parse.urlunparse(

6580

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

6581

compat_opts = self.get_param('compat_opts', [])

6582

6583

mobj = self._get_url_mobj(url)

6584

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

6585

if is_channel and smuggled_data.get('is_music_url'):

6586

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

6587

return self.url_result(

6588

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

6589

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

6590

mdata = self._extract_tab_endpoint(

6591

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

6592

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

6593

get_all=False, expected_type=str)

6594

if not murl:

6595

raise ExtractorError('Failed to resolve album to playlist')

6596

return self.url_result(murl, YoutubeTabIE)

6597

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

6598

return self.url_result(

6599

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

6600

6601

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

6602

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

6603

url = f'{pre}/videos{post}'

6604

if smuggled_data.get('is_music_url'):

6605

self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')

6606

6607

# Handle both video/playlist URLs

6608

qs = parse_qs(url)

6609

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

6610

if not video_id and mobj['not_channel'].startswith('watch'):

6611

if not playlist_id:

6612

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

6613

raise ExtractorError('A video URL was given without video ID', expected=True)

6614

# Common mistake: https://www.youtube.com/watch?list=playlist_id

6615

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

6616

return self.url_result(

6617

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

6618

6619

if not self._yes_playlist(playlist_id, video_id):

6620

return self.url_result(

6621

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6622

6623

data, ytcfg = self._extract_data(url, display_id)

6624

6625

# YouTube may provide a non-standard redirect to the regional channel

6626

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6627

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6628

redirect_url = traverse_obj(

6629

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6630

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6631

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6632

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6633

return self.url_result(redirect_url, YoutubeTabIE)

6634

6635

tabs, extra_tabs = self._extract_tab_renderers(data), []

6636

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6637

selected_tab = self._extract_selected_tab(tabs)

6638

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6639

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6640

6641

# /about is no longer a tab

6642

if original_tab_id == 'about':

6643

return self._empty_playlist(item_id, data)

6644

6645

if not original_tab_id and selected_tab_name:

6646

self.to_screen('Downloading all uploads of the channel. '

6647

'To download only the videos in a specific tab, pass the tab\'s URL')

6648

if self._has_tab(tabs, 'streams'):

6649

extra_tabs.append(''.join((pre, '/streams', post)))

6650

if self._has_tab(tabs, 'shorts'):

6651

extra_tabs.append(''.join((pre, '/shorts', post)))

6652

# XXX: Members-only tab should also be extracted

6653

6654

if not extra_tabs and selected_tab_id != 'videos':

6655

# Channel does not have streams, shorts or videos tabs

6656

if item_id[:2] != 'UC':

6657

return self._empty_playlist(item_id, data)

6658

6659

# Topic channels don't have /videos. Use the equivalent playlist instead

6660

pl_id = f'UU{item_id[2:]}'

6661

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6662

try:

6663

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6664

except ExtractorError:

6665

return self._empty_playlist(item_id, data)

6666

else:

6667

item_id, url = pl_id, pl_url

6668

self.to_screen(

6669

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6670

6671

elif extra_tabs and selected_tab_id != 'videos':

6672

# When there are shorts/live tabs but not videos tab

6673

url, data = f'{pre}{post}', None

6674

6675

elif (original_tab_id or 'videos') != selected_tab_id:

6676

if original_tab_id == 'live':

6677

# Live tab should have redirected to the video

6678

# Except in the case the channel has an actual live tab

6679

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6680

raise UserNotLive(video_id=item_id)

6681

elif selected_tab_name:

6682

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6683

6684

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6685

url = f'{pre}{post}'

6686

6687

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6688

if 'no-youtube-unavailable-videos' not in compat_opts:

6689

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6690

self._extract_and_report_alerts(data, only_once=True)

6691

6692

tabs, entries = self._extract_tab_renderers(data), []

6693

if tabs:

6694

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6695

entries[0].update({

6696

'extractor_key': YoutubeTabIE.ie_key(),

6697

'extractor': YoutubeTabIE.IE_NAME,

6698

'webpage_url': url,

6699

})

6700

if self.get_param('playlist_items') == '0':

6701

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6702

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6703

entries.extend(map(self._real_extract, extra_tabs))

6704

6705

if len(entries) == 1:

6706

return entries[0]

6707

elif entries:

6708

metadata = self._extract_metadata_from_tabs(item_id, data)

6709

uploads_url = 'the Uploads (UU) playlist URL'

6710

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6711

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6712

self.to_screen(

6713

'Downloading as multiple playlists, separated by tabs. '

6714

f'To download as a single playlist instead, pass {uploads_url}')

6715

return self.playlist_result(entries, item_id, **metadata)

6716

6717

# Inline playlist

6718

playlist = traverse_obj(

6719

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6720

if playlist:

6721

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6722

6723

video_id = traverse_obj(

6724

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6725

if video_id:

6726

if tab != '/live': # live tab is expected to redirect to video

6727

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6728

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6729

6730

raise ExtractorError('Unable to recognize tab page')

6731

6732

6733

class YoutubePlaylistIE(InfoExtractor):

6734

IE_DESC = 'YouTube playlists'

6735

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6746

)''' % {

6747

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6748

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6749

}

6750

IE_NAME = 'youtube:playlist'

6751

_TESTS = [{

6752

'note': 'issue #673',

6753

'url': 'PLBB231211A4F62143',

6754

'info_dict': {

6755

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6756

'id': 'PLBB231211A4F62143',

6757

'uploader': 'Wickman',

6758

'uploader_id': '@WickmanVT',

6759

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6760

'view_count': int,

6761

'uploader_url': 'https://www.youtube.com/@WickmanVT',

6762

'modified_date': r're:\d{8}',

6763

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6764

'channel': 'Wickman',

6765

'tags': [],

6766

'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',

6767

'availability': 'public',

6768

},

6769

'playlist_mincount': 29,

6770

}, {

6771

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6772

'info_dict': {

6773

'title': 'YDL_safe_search',

6774

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6775

},

6776

'playlist_count': 2,

6777

'skip': 'This playlist is private',

6778

}, {

6779

'note': 'embedded',

6780

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6785

'uploader': 'milan',

6786

'uploader_id': '@milan5503',

6787

'description': '',

6788

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6789

'tags': [],

6790

'modified_date': '20140919',

6791

'view_count': int,

6792

'channel': 'milan',

6793

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6794

'uploader_url': 'https://www.youtube.com/@milan5503',

6795

'availability': 'public',

6796

},

6797

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],

6798

}, {

6799

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6800

'playlist_mincount': 455,

6801

'info_dict': {

6802

'title': '2018 Chinese New Singles (11/6 updated)',

6803

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6804

'uploader': 'LBK',

6805

'uploader_id': '@music_king',

6806

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6807

'channel': 'LBK',

6808

'view_count': int,

6809

'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',

6810

'tags': [],

6811

'uploader_url': 'https://www.youtube.com/@music_king',

6812

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6813

'modified_date': r're:\d{8}',

6814

'availability': 'public',

6815

},

6816

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6817

}, {

6818

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6819

'only_matching': True,

6820

}, {

6821

# music album playlist

6822

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6823

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6828

if YoutubeTabIE.suitable(url):

6829

return False

6830

from ..utils import parse_qs

6831

qs = parse_qs(url)

6832

if qs.get('v', [None])[0]:

6833

return False

6834

return super().suitable(url)

6835

6836

def _real_extract(self, url):

6837

playlist_id = self._match_id(url)

6838

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6839

url = update_url_query(

6840

'https://www.youtube.com/playlist',

6841

parse_qs(url) or {'list': playlist_id})

6842

if is_music_url:

6843

url = smuggle_url(url, {'is_music_url': True})

6844

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6845

6846

6847

class YoutubeYtBeIE(InfoExtractor):

6848

IE_DESC = 'youtu.be'

6849

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6850

_TESTS = [{

6851

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6856

'uploader': 'Backus-Page House Museum',

6857

'uploader_id': '@backuspagemuseum',

6858

'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',

6859

'upload_date': '20161008',

6860

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6861

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6866

'thumbnail': r're:^https?://.*\.webp',

6867

'channel': 'Backus-Page House Museum',

6868

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6869

'live_status': 'not_live',

6870

'view_count': int,

6871

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6872

'availability': 'public',

6873

'duration': 59,

6874

'comment_count': int,

6875

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6880

},

6881

}, {

6882

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6883

'only_matching': True,

6884

}]

6885

6886

def _real_extract(self, url):

6887

mobj = self._match_valid_url(url)

6888

video_id = mobj.group('id')

6889

playlist_id = mobj.group('playlist_id')

6890

return self.url_result(

6891

update_url_query('https://www.youtube.com/watch', {

6892

'v': video_id,

6893

'list': playlist_id,

6894

'feature': 'youtu.be',

6895

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6896

6897

6898

class YoutubeLivestreamEmbedIE(InfoExtractor):

6899

IE_DESC = 'YouTube livestream embeds'

6900

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6901

_TESTS = [{

6902

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6903

'only_matching': True,

6904

}]

6905

6906

def _real_extract(self, url):

6907

channel_id = self._match_id(url)

6908

return self.url_result(

6909

f'https://www.youtube.com/channel/{channel_id}/live',

6910

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6911

6912

6913

class YoutubeYtUserIE(InfoExtractor):

6914

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6915

IE_NAME = 'youtube:user'

6916

_VALID_URL = r'ytuser:(?P<id>.+)'

6917

_TESTS = [{

6918

'url': 'ytuser:phihag',

6919

'only_matching': True,

6920

}]

6921

6922

def _real_extract(self, url):

6923

user_id = self._match_id(url)

6924

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6925

6926

6927

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6928

IE_NAME = 'youtube:favorites'

6929

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6930

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6931

_LOGIN_REQUIRED = True

6932

_TESTS = [{

6933

'url': ':ytfav',

6934

'only_matching': True,

6935

}, {

6936

'url': ':ytfavorites',

6937

'only_matching': True,

6938

}]

6939

6940

def _real_extract(self, url):

6941

return self.url_result(

6942

'https://www.youtube.com/playlist?list=LL',

6943

ie=YoutubeTabIE.ie_key())

6944

6945

6946

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6947

IE_NAME = 'youtube:notif'

6948

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6949

_VALID_URL = r':ytnotif(?:ication)?s?'

6950

_LOGIN_REQUIRED = True

6951

_TESTS = [{

6952

'url': ':ytnotif',

6953

'only_matching': True,

6954

}, {

6955

'url': ':ytnotifications',

6956

'only_matching': True,

6957

}]

6958

6959

def _extract_notification_menu(self, response, continuation_list):

6960

notification_list = traverse_obj(

6961

response,

6962

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6963

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6964

expected_type=list) or []

6965

continuation_list[0] = None

6966

for item in notification_list:

6967

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6968

if entry:

6969

yield entry

6970

continuation = item.get('continuationItemRenderer')

6971

if continuation:

6972

continuation_list[0] = continuation

6973

6974

def _extract_notification_renderer(self, notification):

6975

video_id = traverse_obj(

6976

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6977

url = f'https://www.youtube.com/watch?v={video_id}'

6978

channel_id = None

6979

if not video_id:

6980

browse_ep = traverse_obj(

6981

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6982

channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))

6983

post_id = self._search_regex(

6984

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6985

'post id', default=None)

6986

if not channel_id or not post_id:

6987

return

6988

# The direct /post url redirects to this in the browser

6989

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6990

6991

channel = traverse_obj(

6992

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6993

expected_type=str)

6994

notification_title = self._get_text(notification, 'shortMessage')

6995

if notification_title:

6996

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6997

# TODO: handle recommended videos

6998

title = self._search_regex(

6999

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

7000

'video title', default=None)

7001

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

7002

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

7008

'video_id': video_id,

7009

'title': title,

7010

'channel_id': channel_id,

7011

'channel': channel,

7012

'uploader': channel,

7013

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

7014

'timestamp': timestamp,

7015

}

7016

7017

def _notification_menu_entries(self, ytcfg):

7018

continuation_list = [None]

7019

response = None

7020

for page in itertools.count(1):

7021

ctoken = traverse_obj(

7022

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

7023

response = self._extract_response(

7024

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

7025

ep='notification/get_notification_menu', check_get_keys='actions',

7026

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

7027

yield from self._extract_notification_menu(response, continuation_list)

7028

if not continuation_list[0]:

7029

break

7030

7031

def _real_extract(self, url):

7032

display_id = 'notifications'

7033

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

7034

self._report_playlist_authcheck(ytcfg)

7035

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

7036

7037

7038

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

7039

IE_DESC = 'YouTube search'

7040

IE_NAME = 'youtube:search'

7041

_SEARCH_KEY = 'ytsearch'

7042

_SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only

7043

_TESTS = [{

7044

'url': 'ytsearch5:youtube-dl test video',

7045

'playlist_count': 5,

7046

'info_dict': {

7047

'id': 'youtube-dl test video',

7048

'title': 'youtube-dl test video',

7049

}

7050

}, {

7051

'note': 'Suicide/self-harm search warning',

7052

'url': 'ytsearch1:i hate myself and i wanna die',

7053

'playlist_count': 1,

7054

'info_dict': {

7055

'id': 'i hate myself and i wanna die',

7056

'title': 'i hate myself and i wanna die',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

7062

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

7063

_SEARCH_KEY = 'ytsearchdate'

7064

IE_DESC = 'YouTube search, newest videos first'

7065

_SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date

7066

_TESTS = [{

7067

'url': 'ytsearchdate5:youtube-dl test video',

7068

'playlist_count': 5,

7069

'info_dict': {

7070

'id': 'youtube-dl test video',

7071

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

7077

IE_DESC = 'YouTube search URLs with sorting and filter support'

7078

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

7079

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

7080

_TESTS = [{

7081

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

7082

'playlist_mincount': 5,

7083

'info_dict': {

7084

'id': 'youtube-dl test video',

7085

'title': 'youtube-dl test video',

7086

}

7087

}, {

7088

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

7089

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

7096

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

7101

# 'entries': [{

7102

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

# Channel results

'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',

7109

'info_dict': {

7110

'id': 'kurzgesagt',

7111

'title': 'kurzgesagt',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'id': 'UCsXVk37bltHxD1rDPwtNM8Q',

7117

'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

7118

'ie_key': 'YoutubeTab',

7119

'channel': 'Kurzgesagt – In a Nutshell',

7120

'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',

7121

'title': 'Kurzgesagt – In a Nutshell',

7122

'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',

7123

# No longer available for search as it is set to the handle.

7124

# 'playlist_count': int,

7125

'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

7126

'thumbnails': list,

7127

'uploader_id': '@kurzgesagt',

7128

'uploader_url': 'https://www.youtube.com/@kurzgesagt',

7129

'uploader': 'Kurzgesagt – In a Nutshell',

7130

'channel_is_verified': True,

7131

'channel_follower_count': int,

7132

}

7133

}],

7134

'params': {'extract_flat': True, 'playlist_items': '1'},

7135

'playlist_mincount': 1,

7136

}, {

7137

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

7138

'only_matching': True,

7139

}]

7140

7141

def _real_extract(self, url):

7142

qs = parse_qs(url)

7143

query = (qs.get('search_query') or qs.get('q'))[0]

7144

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

7145

7146

7147

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

7148

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

7149

IE_NAME = 'youtube:music:search_url'

7150

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

7151

_TESTS = [{

7152

'url': 'https://music.youtube.com/search?q=royalty+free+music',

7153

'playlist_count': 16,

7154

'info_dict': {

7155

'id': 'royalty free music',

7156

'title': 'royalty free music',

7157

}

7158

}, {

7159

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

7160

'playlist_mincount': 30,

7161

'info_dict': {

7162

'id': 'royalty free music - songs',

7163

'title': 'royalty free music - songs',

7164

},

7165

'params': {'extract_flat': 'in_playlist'}

7166

}, {

7167

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

7168

'playlist_mincount': 30,

7169

'info_dict': {

7170

'id': 'royalty free music - community playlists',

7171

'title': 'royalty free music - community playlists',

7172

},

7173

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

7178

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

7179

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

7180

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

7181

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

7182

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

7183

}

7184

7185

def _real_extract(self, url):

7186

qs = parse_qs(url)

7187

query = (qs.get('search_query') or qs.get('q'))[0]

7188

params = qs.get('sp', (None,))[0]

7189

if params:

7190

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

7191

else:

7192

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

7193

params = self._SECTIONS.get(section)

7194

if not params:

7195

section = None

7196

title = join_nonempty(query, section, delim=' - ')

7197

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

7198

7199

7200

class YoutubeFeedsInfoExtractor(InfoExtractor):

7201

"""

7202

Base class for feed extractors

7203

Subclasses must re-define the _FEED_NAME property.

7204

"""

7205

_LOGIN_REQUIRED = True

7206

_FEED_NAME = 'feeds'

7207

7208

def _real_initialize(self):

7209

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

7214

7215

def _real_extract(self, url):

7216

return self.url_result(

7217

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

7218

7219

7220

class YoutubeWatchLaterIE(InfoExtractor):

7221

IE_NAME = 'youtube:watchlater'

7222

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

7223

_VALID_URL = r':ytwatchlater'

7224

_TESTS = [{

7225

'url': ':ytwatchlater',

7226

'only_matching': True,

7227

}]

7228

7229

def _real_extract(self, url):

7230

return self.url_result(

7231

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

7232

7233

7234

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

7235

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

7236

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

7237

_FEED_NAME = 'recommended'

7238

_LOGIN_REQUIRED = False

7239

_TESTS = [{

7240

'url': ':ytrec',

7241

'only_matching': True,

7242

}, {

7243

'url': ':ytrecommended',

7244

'only_matching': True,

7245

}, {

7246

'url': 'https://youtube.com',

7247

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

7252

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

7253

_VALID_URL = r':ytsub(?:scription)?s?'

7254

_FEED_NAME = 'subscriptions'

7255

_TESTS = [{

7256

'url': ':ytsubs',

7257

'only_matching': True,

7258

}, {

7259

'url': ':ytsubscriptions',

7260

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

7265

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

7266

_VALID_URL = r':ythis(?:tory)?'

7267

_FEED_NAME = 'history'

7268

_TESTS = [{

7269

'url': ':ythistory',

7270

'only_matching': True,

}]

class YoutubeShortsAudioPivotIE(InfoExtractor):

7275

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

7276

IE_NAME = 'youtube:shorts:pivot:audio'

7277

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

7278

_TESTS = [{

7279

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

7280

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

7285

"""

7286

Generates sfv_audio_pivot browse params for this video id

7287

"""

7288

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

7289

return urllib.parse.quote(base64.b64encode(pb_params).decode())

7290

7291

def _real_extract(self, url):

7292

video_id = self._match_id(url)

7293

return self.url_result(

7294

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

7299

IE_NAME = 'youtube:truncated_url'

7300

IE_DESC = False # Do not list

7301

_VALID_URL = r'''(?x)

7302

(?:https?://)?

7303

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

7304

(?:watch\?(?:

7305

feature=[a-z_]+|

7306

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

7319

'only_matching': True,

7320

}, {

7321

'url': 'https://www.youtube.com/watch?',

7322

'only_matching': True,

7323

}, {

7324

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

7325

'only_matching': True,

7326

}, {

7327

'url': 'https://www.youtube.com/watch?feature=foo',

7328

'only_matching': True,

7329

}, {

7330

'url': 'https://www.youtube.com/watch?hl=en-GB',

7331

'only_matching': True,

7332

}, {

7333

'url': 'https://www.youtube.com/watch?t=2372',

7334

'only_matching': True,

7335

}]

7336

7337

def _real_extract(self, url):

7338

raise ExtractorError(

7339

'Did you forget to quote the URL? Remember that & is a meta '

7340

'character in most shells, so you want to put the URL in quotes, '

7341

'like youtube-dl '

7342

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

7343

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

7348

IE_NAME = 'youtube:clip'

7349

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

7350

_TESTS = [{

7351

# FIXME: Other metadata should be extracted from the clip, not from the base video

7352

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

7353

'info_dict': {

7354

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

7355

'ext': 'mp4',

7356

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

7361

'categories': ['Gaming'],

7362

'channel': 'Scott The Woz',

7363

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

7364

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

7365

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

7366

'like_count': int,

7367

'playable_in_embed': True,

7368

'tags': 'count:17',

7369

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

7370

'title': 'Mobile Games on Console - Scott The Woz',

7371

'upload_date': '20210920',

7372

'uploader': 'Scott The Woz',

7373

'uploader_id': '@ScottTheWoz',

7374

'uploader_url': 'https://www.youtube.com/@ScottTheWoz',

7375

'view_count': int,

7376

'live_status': 'not_live',

7377

'channel_follower_count': int,

7378

'chapters': 'count:20',

7379

'comment_count': int,

7380

'heatmap': 'count:100',

}

}]

def _real_extract(self, url):

7385

clip_id = self._match_id(url)

7386

_, data = self._extract_webpage(url, clip_id)

7387

7388

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

7389

if not video_id:

7390

raise ExtractorError('Unable to find video ID')

7391

7392

clip_data = traverse_obj(data, (

7393

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

7394

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

7395

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

7396

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

7397

7398

return {

7399

'_type': 'url_transparent',

7400

'url': f'https://www.youtube.com/watch?v={video_id}',

7401

'ie_key': YoutubeIE.ie_key(),

7402

'id': clip_id,

7403

'section_start': int(clip_data['startTimeMs']) / 1000,

7404

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):

7409

IE_NAME = 'youtube:consent'

7410

IE_DESC = False # Do not list

7411

_VALID_URL = r'https?://consent\.youtube\.com/m\?'

7412

_TESTS = [{

7413

'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': '@sana_natori',

7419

'comment_count': int,

7420

'chapters': 'count:13',

7421

'upload_date': '20221223',

7422

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

7423

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

7424

'uploader_url': 'https://www.youtube.com/@sana_natori',

7425

'like_count': int,

7426

'release_date': '20221223',

7427

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

7428

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

7429

'view_count': int,

7430

'playable_in_embed': True,

7431

'duration': 4438,

7432

'availability': 'public',

7433

'channel_follower_count': int,

7434

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

7435

'categories': ['Entertainment'],

7436

'live_status': 'was_live',

7437

'release_timestamp': 1671793345,

7438

'channel': 'さなちゃんねる',

7439

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

7440

'uploader': 'さなちゃんねる',

7441

'channel_is_verified': True,

7442

'heatmap': 'count:100',

7443

},

7444

'add_ie': ['Youtube'],

7445

'params': {'skip_download': 'Youtube'},

7446

}]

7447

7448

def _real_extract(self, url):

7449

redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])

7450

if not redirect_url:

7451

raise ExtractorError('Invalid cookie consent redirect URL', expected=True)

7452

return self.url_result(redirect_url)

7453

7454

7455

class YoutubeTruncatedIDIE(InfoExtractor):

7456

IE_NAME = 'youtube:truncated_id'

7457

IE_DESC = False # Do not list

7458

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

7459

7460

_TESTS = [{

7461

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

7462

'only_matching': True,

7463

}]

7464

7465

def _real_extract(self, url):

7466

video_id = self._match_id(url)

7467

raise ExtractorError(

7468

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

7469

expected=True)