jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import collections
	4	import copy
	5	import datetime
	6	import enum
	7	import hashlib
	8	import itertools
	9	import json
	10	import math
	11	import os.path
	12	import random
	13	import re
	14	import sys
	15	import threading
	16	import time
	17	import traceback
	18	import urllib.parse
	19
	20	from .common import InfoExtractor, SearchInfoExtractor
	21	from .openload import PhantomJSwrapper
	22	from ..compat import functools
	23	from ..jsinterp import JSInterpreter
	24	from ..networking.exceptions import HTTPError, network_exceptions
	25	from ..utils import (
	26	NO_DEFAULT,
	27	ExtractorError,
	28	LazyList,
	29	UserNotLive,
	30	bug_reports_message,
	31	classproperty,
	32	clean_html,
	33	datetime_from_str,
	34	dict_get,
	35	filter_dict,
	36	float_or_none,
	37	format_field,
	38	get_first,
	39	int_or_none,
	40	is_html,
	41	join_nonempty,
	42	js_to_json,
	43	mimetype2ext,
	44	orderedSet,
	45	parse_codecs,
	46	parse_count,
	47	parse_duration,
	48	parse_iso8601,
	49	parse_qs,
	50	qualities,
	51	remove_start,
	52	smuggle_url,
	53	str_or_none,
	54	str_to_int,
	55	strftime_or_none,
	56	traverse_obj,
	57	try_get,
	58	unescapeHTML,
	59	unified_strdate,
	60	unified_timestamp,
	61	unsmuggle_url,
	62	update_url_query,
	63	url_or_none,
	64	urljoin,
	65	variadic,
	66	)
	67
	68	STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
	69	# any clients starting with _ cannot be explicitly requested by the user
	70	INNERTUBE_CLIENTS = {
	71	'web': {
	72	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	73	'INNERTUBE_CONTEXT': {
	74	'client': {
	75	'clientName': 'WEB',
	76	'clientVersion': '2.20220801.00.00',
	77	}
	78	},
	79	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	80	},
	81	'web_embedded': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB_EMBEDDED_PLAYER',
	86	'clientVersion': '1.20220731.00.00',
	87	},
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	90	},
	91	'web_music': {
	92	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	93	'INNERTUBE_HOST': 'music.youtube.com',
	94	'INNERTUBE_CONTEXT': {
	95	'client': {
	96	'clientName': 'WEB_REMIX',
	97	'clientVersion': '1.20220727.01.00',
	98	}
	99	},
	100	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	101	},
	102	'web_creator': {
	103	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_CREATOR',
	107	'clientVersion': '1.20220726.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	111	},
	112	'android': {
	113	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'ANDROID',
	117	'clientVersion': '17.31.35',
	118	'androidSdkVersion': 30,
	119	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	120	}
	121	},
	122	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	123	'REQUIRE_JS_PLAYER': False
	124	},
	125	'android_embedded': {
	126	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	127	'INNERTUBE_CONTEXT': {
	128	'client': {
	129	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	130	'clientVersion': '17.31.35',
	131	'androidSdkVersion': 30,
	132	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '5.16.51',
	144	'androidSdkVersion': 30,
	145	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	146	}
	147	},
	148	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	149	'REQUIRE_JS_PLAYER': False
	150	},
	151	'android_creator': {
	152	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	153	'INNERTUBE_CONTEXT': {
	154	'client': {
	155	'clientName': 'ANDROID_CREATOR',
	156	'clientVersion': '22.30.100',
	157	'androidSdkVersion': 30,
	158	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	159	},
	160	},
	161	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	162	'REQUIRE_JS_PLAYER': False
	163	},
	164	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	165	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	166	'ios': {
	167	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	168	'INNERTUBE_CONTEXT': {
	169	'client': {
	170	'clientName': 'IOS',
	171	'clientVersion': '17.33.2',
	172	'deviceModel': 'iPhone14,3',
	173	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '17.33.2',
	184	'deviceModel': 'iPhone14,3',
	185	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '5.21',
	197	'deviceModel': 'iPhone14,3',
	198	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	199	},
	200	},
	201	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	202	'REQUIRE_JS_PLAYER': False
	203	},
	204	'ios_creator': {
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'IOS_CREATOR',
	208	'clientVersion': '22.33.101',
	209	'deviceModel': 'iPhone14,3',
	210	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	211	},
	212	},
	213	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	214	'REQUIRE_JS_PLAYER': False
	215	},
	216	# mweb has 'ultralow' formats
	217	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	218	'mweb': {
	219	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	220	'INNERTUBE_CONTEXT': {
	221	'client': {
	222	'clientName': 'MWEB',
	223	'clientVersion': '2.20220801.00.00',
	224	}
	225	},
	226	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	227	},
	228	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	229	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	230	'tv_embedded': {
	231	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	232	'INNERTUBE_CONTEXT': {
	233	'client': {
	234	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	235	'clientVersion': '2.0',
	236	},
	237	},
	238	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	239	},
	240	}
	241
	242
	243	def _split_innertube_client(client_name):
	244	variant, *base = client_name.rsplit('.', 1)
	245	if base:
	246	return variant, base[0], variant
	247	base, *variant = client_name.split('_', 1)
	248	return client_name, base, variant[0] if variant else None
	249
	250
	251	def short_client_name(client_name):
	252	main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
	253	return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
	254
	255
	256	def build_innertube_clients():
	257	THIRD_PARTY = {
	258	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	259	}
	260	BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
	261	priority = qualities(BASE_CLIENTS[::-1])
	262
	263	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	264	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	265	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	266	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	267	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	268
	269	_, base_client, variant = _split_innertube_client(client)
	270	ytcfg['priority'] = 10 * priority(base_client)
	271
	272	if not variant:
	273	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	274	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	275	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	276	embedscreen['priority'] -= 3
	277	elif variant == 'embedded':
	278	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	279	ytcfg['priority'] -= 2
	280	else:
	281	ytcfg['priority'] -= 3
	282
	283
	284	build_innertube_clients()
	285
	286
	287	class BadgeType(enum.Enum):
	288	AVAILABILITY_UNLISTED = enum.auto()
	289	AVAILABILITY_PRIVATE = enum.auto()
	290	AVAILABILITY_PUBLIC = enum.auto()
	291	AVAILABILITY_PREMIUM = enum.auto()
	292	AVAILABILITY_SUBSCRIPTION = enum.auto()
	293	LIVE_NOW = enum.auto()
	294	VERIFIED = enum.auto()
	295
	296
	297	class YoutubeBaseInfoExtractor(InfoExtractor):
	298	"""Provide base functions for Youtube extractors"""
	299
	300	_RESERVED_NAMES = (
	301	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|live\|watch_popup\|clip\|'
	302	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	303	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	304	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	305
	306	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	307
	308	# _NETRC_MACHINE = 'youtube'
	309
	310	# If True it will raise an error if no login info is provided
	311	_LOGIN_REQUIRED = False
	312
	313	_INVIDIOUS_SITES = (
	314	# invidious-redirect websites
	315	r'(?:www\.)?redirect\.invidious\.io',
	316	r'(?:(?:www\|dev)\.)?invidio\.us',
	317	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	318	r'(?:www\.)?invidious\.pussthecat\.org',
	319	r'(?:www\.)?invidious\.zee\.li',
	320	r'(?:www\.)?invidious\.ethibox\.fr',
	321	r'(?:www\.)?iv\.ggtyler\.dev',
	322	r'(?:www\.)?inv\.vern\.i2p',
	323	r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
	324	r'(?:www\.)?inv\.riverside\.rocks',
	325	r'(?:www\.)?invidious\.silur\.me',
	326	r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
	327	r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
	328	r'(?:www\.)?invidious\.slipfox\.xyz',
	329	r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
	330	r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
	331	r'(?:www\.)?invidious\.tiekoetter\.com',
	332	r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
	333	r'(?:www\.)?invidious\.nerdvpn\.de',
	334	r'(?:www\.)?invidious\.weblibre\.org',
	335	r'(?:www\.)?inv\.odyssey346\.dev',
	336	r'(?:www\.)?invidious\.dhusch\.de',
	337	r'(?:www\.)?iv\.melmac\.space',
	338	r'(?:www\.)?watch\.thekitty\.zone',
	339	r'(?:www\.)?invidious\.privacydev\.net',
	340	r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
	341	r'(?:www\.)?invidious\.drivet\.xyz',
	342	r'(?:www\.)?vid\.priv\.au',
	343	r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
	344	r'(?:www\.)?inv\.vern\.cc',
	345	r'(?:www\.)?invidious\.esmailelbob\.xyz',
	346	r'(?:www\.)?invidious\.sethforprivacy\.com',
	347	r'(?:www\.)?yt\.oelrichsgarcia\.de',
	348	r'(?:www\.)?yt\.artemislena\.eu',
	349	r'(?:www\.)?invidious\.flokinet\.to',
	350	r'(?:www\.)?invidious\.baczek\.me',
	351	r'(?:www\.)?y\.com\.sb',
	352	r'(?:www\.)?invidious\.epicsite\.xyz',
	353	r'(?:www\.)?invidious\.lidarshield\.cloud',
	354	r'(?:www\.)?yt\.funami\.tech',
	355	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	356	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	357	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	358	# youtube-dl invidious instances list
	359	r'(?:(?:www\|no)\.)?invidiou\.sh',
	360	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	361	r'(?:www\.)?invidious\.kabi\.tk',
	362	r'(?:www\.)?invidious\.mastodon\.host',
	363	r'(?:www\.)?invidious\.zapashcanon\.fr',
	364	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	365	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	366	r'(?:www\.)?invidious\.himiko\.cloud',
	367	r'(?:www\.)?invidious\.reallyancient\.tech',
	368	r'(?:www\.)?invidious\.tube',
	369	r'(?:www\.)?invidiou\.site',
	370	r'(?:www\.)?invidious\.site',
	371	r'(?:www\.)?invidious\.xyz',
	372	r'(?:www\.)?invidious\.nixnet\.xyz',
	373	r'(?:www\.)?invidious\.048596\.xyz',
	374	r'(?:www\.)?invidious\.drycat\.fr',
	375	r'(?:www\.)?inv\.skyn3t\.in',
	376	r'(?:www\.)?tube\.poal\.co',
	377	r'(?:www\.)?tube\.connect\.cafe',
	378	r'(?:www\.)?vid\.wxzm\.sx',
	379	r'(?:www\.)?vid\.mint\.lgbt',
	380	r'(?:www\.)?vid\.puffyan\.us',
	381	r'(?:www\.)?yewtu\.be',
	382	r'(?:www\.)?yt\.elukerio\.org',
	383	r'(?:www\.)?yt\.lelux\.fi',
	384	r'(?:www\.)?invidious\.ggc-project\.de',
	385	r'(?:www\.)?yt\.maisputain\.ovh',
	386	r'(?:www\.)?ytprivate\.com',
	387	r'(?:www\.)?invidious\.13ad\.de',
	388	r'(?:www\.)?invidious\.toot\.koeln',
	389	r'(?:www\.)?invidious\.fdn\.fr',
	390	r'(?:www\.)?watch\.nettohikari\.com',
	391	r'(?:www\.)?invidious\.namazso\.eu',
	392	r'(?:www\.)?invidious\.silkky\.cloud',
	393	r'(?:www\.)?invidious\.exonip\.de',
	394	r'(?:www\.)?invidious\.riverside\.rocks',
	395	r'(?:www\.)?invidious\.blamefran\.net',
	396	r'(?:www\.)?invidious\.moomoo\.de',
	397	r'(?:www\.)?ytb\.trom\.tf',
	398	r'(?:www\.)?yt\.cyberhost\.uk',
	399	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	400	r'(?:www\.)?qklhadlycap4cnod\.onion',
	401	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	402	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	403	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	404	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	405	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	406	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	407	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	408	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	409	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	410	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	411	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	412	r'(?:www\.)?piped\.kavin\.rocks',
	413	r'(?:www\.)?piped\.tokhmi\.xyz',
	414	r'(?:www\.)?piped\.syncpundit\.io',
	415	r'(?:www\.)?piped\.mha\.fi',
	416	r'(?:www\.)?watch\.whatever\.social',
	417	r'(?:www\.)?piped\.garudalinux\.org',
	418	r'(?:www\.)?piped\.rivo\.lol',
	419	r'(?:www\.)?piped-libre\.kavin\.rocks',
	420	r'(?:www\.)?yt\.jae\.fi',
	421	r'(?:www\.)?piped\.mint\.lgbt',
	422	r'(?:www\.)?il\.ax',
	423	r'(?:www\.)?piped\.esmailelbob\.xyz',
	424	r'(?:www\.)?piped\.projectsegfau\.lt',
	425	r'(?:www\.)?piped\.privacydev\.net',
	426	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	427	r'(?:www\.)?piped\.smnz\.de',
	428	r'(?:www\.)?piped\.adminforge\.de',
	429	r'(?:www\.)?watch\.whatevertinfoil\.de',
	430	r'(?:www\.)?piped\.qdi\.fi',
	431	r'(?:www\.)?piped\.video',
	432	r'(?:www\.)?piped\.aeong\.one',
	433	r'(?:www\.)?piped\.moomoo\.me',
	434	r'(?:www\.)?piped\.chauvet\.pro',
	435	r'(?:www\.)?watch\.leptons\.xyz',
	436	r'(?:www\.)?pd\.vern\.cc',
	437	r'(?:www\.)?piped\.hostux\.net',
	438	r'(?:www\.)?piped\.lunar\.icu',
	439	# Hyperpipe instances from https://hyperpipe.codeberg.page/
	440	r'(?:www\.)?hyperpipe\.surge\.sh',
	441	r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
	442	r'(?:www\.)?listen\.whatever\.social',
	443	r'(?:www\.)?music\.adminforge\.de',
	444	)
	445
	446	# extracted from account/account_menu ep
	447	# XXX: These are the supported YouTube UI and API languages,
	448	# which is slightly different from languages supported for translation in YouTube studio
	449	_SUPPORTED_LANG_CODES = [
	450	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	451	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	452	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	453	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	454	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	455	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	456	]
	457
	458	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	459
	460	_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
	461	_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
	462
	463	def ucid_or_none(self, ucid):
	464	return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
	465
	466	def handle_or_none(self, handle):
	467	return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
	468
	469	def handle_from_url(self, url):
	470	return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
	471	url, 'channel handle', default=None)
	472
	473	def ucid_from_url(self, url):
	474	return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
	475	url, 'channel id', default=None)
	476
	477	@functools.cached_property
	478	def _preferred_lang(self):
	479	"""
	480	Returns a language code supported by YouTube for the user preferred language.
	481	Returns None if no preferred language set.
	482	"""
	483	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	484	if not preferred_lang:
	485	return
	486	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	487	raise ExtractorError(
	488	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	489	expected=True)
	490	elif preferred_lang != 'en':
	491	self.report_warning(
	492	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	493	return preferred_lang
	494
	495	def _initialize_consent(self):
	496	cookies = self._get_cookies('https://www.youtube.com/')
	497	if cookies.get('__Secure-3PSID'):
	498	return
	499	socs = cookies.get('SOCS')
	500	if socs and not socs.value.startswith('CAA'): # not consented

1

import base64

import calendar

import collections

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

21

from .openload import PhantomJSwrapper

22

from ..compat import functools

23

from ..jsinterp import JSInterpreter

24

from ..networking.exceptions import HTTPError, network_exceptions

25

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'

69

# any clients starting with _ cannot be explicitly requested by the user

70

INNERTUBE_CLIENTS = {

71

'web': {

72

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

73

'INNERTUBE_CONTEXT': {

74

'client': {

75

'clientName': 'WEB',

76

'clientVersion': '2.20220801.00.00',

77

}

78

},

79

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

80

},

81

'web_embedded': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB_EMBEDDED_PLAYER',

86

'clientVersion': '1.20220731.00.00',

87

},

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

90

},

91

'web_music': {

92

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

93

'INNERTUBE_HOST': 'music.youtube.com',

94

'INNERTUBE_CONTEXT': {

95

'client': {

96

'clientName': 'WEB_REMIX',

97

'clientVersion': '1.20220727.01.00',

98

}

99

},

100

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

101

},

102

'web_creator': {

103

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_CREATOR',

107

'clientVersion': '1.20220726.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

111

},

112

'android': {

113

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'ANDROID',

117

'clientVersion': '17.31.35',

118

'androidSdkVersion': 30,

119

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

120

}

121

},

122

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

123

'REQUIRE_JS_PLAYER': False

124

},

125

'android_embedded': {

126

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

127

'INNERTUBE_CONTEXT': {

128

'client': {

129

'clientName': 'ANDROID_EMBEDDED_PLAYER',

130

'clientVersion': '17.31.35',

131

'androidSdkVersion': 30,

132

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '5.16.51',

144

'androidSdkVersion': 30,

145

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

146

}

147

},

148

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

149

'REQUIRE_JS_PLAYER': False

150

},

151

'android_creator': {

152

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

153

'INNERTUBE_CONTEXT': {

154

'client': {

155

'clientName': 'ANDROID_CREATOR',

156

'clientVersion': '22.30.100',

157

'androidSdkVersion': 30,

158

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

159

},

160

},

161

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

162

'REQUIRE_JS_PLAYER': False

163

},

164

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

165

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

166

'ios': {

167

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

168

'INNERTUBE_CONTEXT': {

169

'client': {

170

'clientName': 'IOS',

171

'clientVersion': '17.33.2',

172

'deviceModel': 'iPhone14,3',

173

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '17.33.2',

184

'deviceModel': 'iPhone14,3',

185

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '5.21',

197

'deviceModel': 'iPhone14,3',

198

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

199

},

200

},

201

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

202

'REQUIRE_JS_PLAYER': False

203

},

204

'ios_creator': {

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'IOS_CREATOR',

208

'clientVersion': '22.33.101',

209

'deviceModel': 'iPhone14,3',

210

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

211

},

212

},

213

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

214

'REQUIRE_JS_PLAYER': False

215

},

216

# mweb has 'ultralow' formats

217

# See: https://github.com/yt-dlp/yt-dlp/pull/557

218

'mweb': {

219

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

220

'INNERTUBE_CONTEXT': {

221

'client': {

222

'clientName': 'MWEB',

223

'clientVersion': '2.20220801.00.00',

224

}

225

},

226

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

227

},

228

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

229

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

230

'tv_embedded': {

231

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

232

'INNERTUBE_CONTEXT': {

233

'client': {

234

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

235

'clientVersion': '2.0',

236

},

237

},

238

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

244

variant, *base = client_name.rsplit('.', 1)

245

if base:

246

return variant, base[0], variant

247

base, *variant = client_name.split('_', 1)

248

return client_name, base, variant[0] if variant else None

249

250

251

def short_client_name(client_name):

252

main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')

253

return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()

254

255

256

def build_innertube_clients():

257

THIRD_PARTY = {

258

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

259

}

260

BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')

261

priority = qualities(BASE_CLIENTS[::-1])

262

263

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

264

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

265

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

266

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

267

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

268

269

_, base_client, variant = _split_innertube_client(client)

270

ytcfg['priority'] = 10 * priority(base_client)

271

272

if not variant:

273

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

274

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

275

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

276

embedscreen['priority'] -= 3

277

elif variant == 'embedded':

278

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

279

ytcfg['priority'] -= 2

280

else:

281

ytcfg['priority'] -= 3

282

283

284

build_innertube_clients()

285

286

287

class BadgeType(enum.Enum):

288

AVAILABILITY_UNLISTED = enum.auto()

289

AVAILABILITY_PRIVATE = enum.auto()

290

AVAILABILITY_PUBLIC = enum.auto()

291

AVAILABILITY_PREMIUM = enum.auto()

292

AVAILABILITY_SUBSCRIPTION = enum.auto()

293

LIVE_NOW = enum.auto()

294

VERIFIED = enum.auto()

295

296

297

class YoutubeBaseInfoExtractor(InfoExtractor):

298

"""Provide base functions for Youtube extractors"""

_RESERVED_NAMES = (

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

307

308

# _NETRC_MACHINE = 'youtube'

309

310

# If True it will raise an error if no login info is provided

311

_LOGIN_REQUIRED = False

312

313

_INVIDIOUS_SITES = (

314

# invidious-redirect websites

315

r'(?:www\.)?redirect\.invidious\.io',

316

r'(?:(?:www|dev)\.)?invidio\.us',

317

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

318

r'(?:www\.)?invidious\.pussthecat\.org',

319

r'(?:www\.)?invidious\.zee\.li',

320

r'(?:www\.)?invidious\.ethibox\.fr',

321

r'(?:www\.)?iv\.ggtyler\.dev',

322

r'(?:www\.)?inv\.vern\.i2p',

323

r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',

324

r'(?:www\.)?inv\.riverside\.rocks',

325

r'(?:www\.)?invidious\.silur\.me',

326

r'(?:www\.)?inv\.bp\.projectsegfau\.lt',

327

r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',

328

r'(?:www\.)?invidious\.slipfox\.xyz',

329

r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',

330

r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',

331

r'(?:www\.)?invidious\.tiekoetter\.com',

332

r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',

333

r'(?:www\.)?invidious\.nerdvpn\.de',

334

r'(?:www\.)?invidious\.weblibre\.org',

335

r'(?:www\.)?inv\.odyssey346\.dev',

336

r'(?:www\.)?invidious\.dhusch\.de',

337

r'(?:www\.)?iv\.melmac\.space',

338

r'(?:www\.)?watch\.thekitty\.zone',

339

r'(?:www\.)?invidious\.privacydev\.net',

340

r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',

341

r'(?:www\.)?invidious\.drivet\.xyz',

342

r'(?:www\.)?vid\.priv\.au',

343

r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',

344

r'(?:www\.)?inv\.vern\.cc',

345

r'(?:www\.)?invidious\.esmailelbob\.xyz',

346

r'(?:www\.)?invidious\.sethforprivacy\.com',

347

r'(?:www\.)?yt\.oelrichsgarcia\.de',

348

r'(?:www\.)?yt\.artemislena\.eu',

349

r'(?:www\.)?invidious\.flokinet\.to',

350

r'(?:www\.)?invidious\.baczek\.me',

351

r'(?:www\.)?y\.com\.sb',

352

r'(?:www\.)?invidious\.epicsite\.xyz',

353

r'(?:www\.)?invidious\.lidarshield\.cloud',

354

r'(?:www\.)?yt\.funami\.tech',

355

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

356

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

357

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

358

# youtube-dl invidious instances list

359

r'(?:(?:www|no)\.)?invidiou\.sh',

360

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

361

r'(?:www\.)?invidious\.kabi\.tk',

362

r'(?:www\.)?invidious\.mastodon\.host',

363

r'(?:www\.)?invidious\.zapashcanon\.fr',

364

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

365

r'(?:www\.)?invidious\.tinfoil-hat\.net',

366

r'(?:www\.)?invidious\.himiko\.cloud',

367

r'(?:www\.)?invidious\.reallyancient\.tech',

368

r'(?:www\.)?invidious\.tube',

369

r'(?:www\.)?invidiou\.site',

370

r'(?:www\.)?invidious\.site',

371

r'(?:www\.)?invidious\.xyz',

372

r'(?:www\.)?invidious\.nixnet\.xyz',

373

r'(?:www\.)?invidious\.048596\.xyz',

374

r'(?:www\.)?invidious\.drycat\.fr',

375

r'(?:www\.)?inv\.skyn3t\.in',

376

r'(?:www\.)?tube\.poal\.co',

377

r'(?:www\.)?tube\.connect\.cafe',

378

r'(?:www\.)?vid\.wxzm\.sx',

379

r'(?:www\.)?vid\.mint\.lgbt',

380

r'(?:www\.)?vid\.puffyan\.us',

381

r'(?:www\.)?yewtu\.be',

382

r'(?:www\.)?yt\.elukerio\.org',

383

r'(?:www\.)?yt\.lelux\.fi',

384

r'(?:www\.)?invidious\.ggc-project\.de',

385

r'(?:www\.)?yt\.maisputain\.ovh',

386

r'(?:www\.)?ytprivate\.com',

387

r'(?:www\.)?invidious\.13ad\.de',

388

r'(?:www\.)?invidious\.toot\.koeln',

389

r'(?:www\.)?invidious\.fdn\.fr',

390

r'(?:www\.)?watch\.nettohikari\.com',

391

r'(?:www\.)?invidious\.namazso\.eu',

392

r'(?:www\.)?invidious\.silkky\.cloud',

393

r'(?:www\.)?invidious\.exonip\.de',

394

r'(?:www\.)?invidious\.riverside\.rocks',

395

r'(?:www\.)?invidious\.blamefran\.net',

396

r'(?:www\.)?invidious\.moomoo\.de',

397

r'(?:www\.)?ytb\.trom\.tf',

398

r'(?:www\.)?yt\.cyberhost\.uk',

399

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

400

r'(?:www\.)?qklhadlycap4cnod\.onion',

401

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

402

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

403

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

404

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

405

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

406

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

407

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

408

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

409

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

410

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

411

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

412

r'(?:www\.)?piped\.kavin\.rocks',

413

r'(?:www\.)?piped\.tokhmi\.xyz',

414

r'(?:www\.)?piped\.syncpundit\.io',

415

r'(?:www\.)?piped\.mha\.fi',

416

r'(?:www\.)?watch\.whatever\.social',

417

r'(?:www\.)?piped\.garudalinux\.org',

418

r'(?:www\.)?piped\.rivo\.lol',

419

r'(?:www\.)?piped-libre\.kavin\.rocks',

420

r'(?:www\.)?yt\.jae\.fi',

421

r'(?:www\.)?piped\.mint\.lgbt',

422

r'(?:www\.)?il\.ax',

423

r'(?:www\.)?piped\.esmailelbob\.xyz',

424

r'(?:www\.)?piped\.projectsegfau\.lt',

425

r'(?:www\.)?piped\.privacydev\.net',

426

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

427

r'(?:www\.)?piped\.smnz\.de',

428

r'(?:www\.)?piped\.adminforge\.de',

429

r'(?:www\.)?watch\.whatevertinfoil\.de',

430

r'(?:www\.)?piped\.qdi\.fi',

431

r'(?:www\.)?piped\.video',

432

r'(?:www\.)?piped\.aeong\.one',

433

r'(?:www\.)?piped\.moomoo\.me',

434

r'(?:www\.)?piped\.chauvet\.pro',

435

r'(?:www\.)?watch\.leptons\.xyz',

436

r'(?:www\.)?pd\.vern\.cc',

437

r'(?:www\.)?piped\.hostux\.net',

438

r'(?:www\.)?piped\.lunar\.icu',

439

# Hyperpipe instances from https://hyperpipe.codeberg.page/

440

r'(?:www\.)?hyperpipe\.surge\.sh',

441

r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',

442

r'(?:www\.)?listen\.whatever\.social',

443

r'(?:www\.)?music\.adminforge\.de',

444

)

445

446

# extracted from account/account_menu ep

447

# XXX: These are the supported YouTube UI and API languages,

448

# which is slightly different from languages supported for translation in YouTube studio

449

_SUPPORTED_LANG_CODES = [

450

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

451

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

452

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

453

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

454

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

455

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

456

]

457

458

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

459

460

_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en

461

_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'

462

463

def ucid_or_none(self, ucid):

464

return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)

465

466

def handle_or_none(self, handle):

467

return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)

468

469

def handle_from_url(self, url):

470

return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',

471

url, 'channel handle', default=None)

472

473

def ucid_from_url(self, url):

474

return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',

475

url, 'channel id', default=None)

476

477

@functools.cached_property

478

def _preferred_lang(self):

479

"""

480

Returns a language code supported by YouTube for the user preferred language.

481

Returns None if no preferred language set.

482

"""

483

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

484

if not preferred_lang:

485

return

486

if preferred_lang not in self._SUPPORTED_LANG_CODES:

487

raise ExtractorError(

488

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

489

expected=True)

490

elif preferred_lang != 'en':

491

self.report_warning(

492

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

493

return preferred_lang

494

495

def _initialize_consent(self):

496

cookies = self._get_cookies('https://www.youtube.com/')

497

if cookies.get('__Secure-3PSID'):

498

return

499

socs = cookies.get('SOCS')

500

if socs and not socs.value.startswith('CAA'): # not consented

501

return

502

self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)

503

504

def _initialize_pref(self):

505

cookies = self._get_cookies('https://www.youtube.com/')

506

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

511

except ValueError:

512

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

513

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

514

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

515

516

def _real_initialize(self):

517

self._initialize_pref()

518

self._initialize_consent()

519

self._check_login_required()

520

521

def _check_login_required(self):

522

if self._LOGIN_REQUIRED and not self._cookies_passed:

523

self.raise_login_required('Login details are needed to download this content', method='cookies')

524

525

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

526

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

527

528

def _get_default_ytcfg(self, client='web'):

529

return copy.deepcopy(INNERTUBE_CLIENTS[client])

530

531

def _get_innertube_host(self, client='web'):

532

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

533

534

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

535

# try_get but with fallback to default ytcfg client values when present

536

_func = lambda y: try_get(y, getter, expected_type)

537

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

538

539

def _extract_client_name(self, ytcfg, default_client='web'):

540

return self._ytcfg_get_safe(

541

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

542

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

543

544

def _extract_client_version(self, ytcfg, default_client='web'):

545

return self._ytcfg_get_safe(

546

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

547

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

548

549

def _select_api_hostname(self, req_api_hostname, default_client=None):

550

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

551

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

552

553

def _extract_api_key(self, ytcfg=None, default_client='web'):

554

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

555

556

def _extract_context(self, ytcfg=None, default_client='web'):

557

context = get_first(

558

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

559

# Enforce language and tz for extraction

560

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

561

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

567

time_now = round(time.time())

568

if self._SAPISID is None:

569

yt_cookies = self._get_cookies('https://www.youtube.com')

570

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

571

# See: https://github.com/yt-dlp/yt-dlp/issues/393

572

sapisid_cookie = dict_get(

573

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

574

if sapisid_cookie and sapisid_cookie.value:

575

self._SAPISID = sapisid_cookie.value

576

self.write_debug('Extracted SAPISID cookie')

577

# SAPISID cookie is required if not already present

578

if not yt_cookies.get('SAPISID'):

579

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

580

self._set_cookie(

581

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

582

else:

583

self._SAPISID = False

584

if not self._SAPISID:

585

return None

586

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

587

sapisidhash = hashlib.sha1(

588

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

589

return f'SAPISIDHASH {time_now}_{sapisidhash}'

590

591

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

592

note='Downloading API JSON', errnote='Unable to download API page',

593

context=None, api_key=None, api_hostname=None, default_client='web'):

594

595

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

596

data.update(query)

597

real_headers = self.generate_api_headers(default_client=default_client)

598

real_headers.update({'content-type': 'application/json'})

599

if headers:

600

real_headers.update(headers)

601

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

602

or api_key or self._extract_api_key(default_client=default_client))

603

return self._download_json(

604

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

605

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

606

data=json.dumps(data).encode('utf8'), headers=real_headers,

607

query={'key': api_key, 'prettyPrint': 'false'})

608

609

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

610

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

611

612

@staticmethod

613

def _extract_session_index(*data):

614

"""

615

Index of current account in account list.

616

See: https://github.com/yt-dlp/yt-dlp/pull/519

617

"""

618

for ytcfg in data:

619

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

620

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

625

if ytcfg:

626

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

631

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

632

'identity token', default=None, fatal=False)

633

634

@staticmethod

635

def _extract_account_syncid(*args):

636

"""

637

Extract syncId required to download private playlists of secondary channels

638

@params response and/or ytcfg

639

"""

640

for data in args:

641

# ytcfg includes channel_syncid if on secondary channel

642

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

647

lambda x: x['DATASYNC_ID']), str) or '').split('||')

648

if len(sync_ids) >= 2 and sync_ids[1]:

649

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

650

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

655

"""

656

Extracts visitorData from an API response or ytcfg

657

Appears to be used to track session state

658

"""

659

return get_first(

660

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

661

expected_type=str)

662

663

@functools.cached_property

664

def is_authenticated(self):

665

return bool(self._generate_sapisidhash_header())

666

667

def extract_ytcfg(self, video_id, webpage):

668

if not webpage:

669

return {}

670

return self._parse_json(

671

self._search_regex(

672

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

673

default='{}'), video_id, fatal=False) or {}

674

675

def generate_api_headers(

676

self, *, ytcfg=None, account_syncid=None, session_index=None,

677

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

678

679

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

680

headers = {

681

'X-YouTube-Client-Name': str(

682

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

683

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

684

'Origin': origin,

685

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

686

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

687

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

688

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

689

}

690

if session_index is None:

691

session_index = self._extract_session_index(ytcfg)

692

if account_syncid or session_index is not None:

693

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

694

695

auth = self._generate_sapisidhash_header(origin)

696

if auth is not None:

697

headers['Authorization'] = auth

698

headers['X-Origin'] = origin

699

return filter_dict(headers)

700

701

def _download_ytcfg(self, client, video_id):

702

url = {

703

'web': 'https://www.youtube.com',

704

'web_music': 'https://music.youtube.com',

705

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

710

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

711

return self.extract_ytcfg(video_id, webpage) or {}

712

713

@staticmethod

714

def _build_api_continuation_query(continuation, ctp=None):

715

query = {

716

'continuation': continuation

717

}

718

# TODO: Inconsistency with clickTrackingParams.

719

# Currently we have a fixed ctp contained within context (from ytcfg)

720

# and a ctp in root query for continuation.

721

if ctp:

722

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

727

next_continuation = try_get(

728

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

729

lambda x: x['continuation']['reloadContinuationData']), dict)

730

if not next_continuation:

731

return

732

continuation = next_continuation.get('continuation')

733

if not continuation:

734

return

735

ctp = next_continuation.get('clickTrackingParams')

736

return cls._build_api_continuation_query(continuation, ctp)

737

738

@classmethod

739

def _extract_continuation_ep_data(cls, continuation_ep: dict):

740

if isinstance(continuation_ep, dict):

741

continuation = try_get(

742

continuation_ep, lambda x: x['continuationCommand']['token'], str)

743

if not continuation:

744

return

745

ctp = continuation_ep.get('clickTrackingParams')

746

return cls._build_api_continuation_query(continuation, ctp)

747

748

@classmethod

749

def _extract_continuation(cls, renderer):

750

next_continuation = cls._extract_next_continuation_data(renderer)

751

if next_continuation:

752

return next_continuation

753

754

return traverse_obj(renderer, (

755

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

756

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

757

), get_all=False, expected_type=cls._extract_continuation_ep_data)

758

759

@classmethod

760

def _extract_alerts(cls, data):

761

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

762

if not isinstance(alert_dict, dict):

763

continue

764

for alert in alert_dict.values():

765

alert_type = alert.get('type')

766

if not alert_type:

767

continue

768

message = cls._get_text(alert, 'text')

769

if message:

770

yield alert_type, message

771

772

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

773

errors, warnings = [], []

774

for alert_type, alert_message in alerts:

775

if alert_type.lower() == 'error' and fatal:

776

errors.append([alert_type, alert_message])

777

elif alert_message not in self._IGNORED_WARNINGS:

778

warnings.append([alert_type, alert_message])

779

780

for alert_type, alert_message in (warnings + errors[:-1]):

781

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

782

if errors:

783

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

784

785

def _extract_and_report_alerts(self, data, *args, **kwargs):

786

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

787

788

def _extract_badges(self, badge_list: list):

789

"""

790

Extract known BadgeType's from a list of badge renderers.

791

@returns [{'type': BadgeType}]

792

"""

793

icon_type_map = {

794

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

795

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

796

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,

797

'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,

798

'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,

799

'CHECK': BadgeType.VERIFIED,

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

804

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

805

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,

806

'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,

807

'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

812

'private': BadgeType.AVAILABILITY_PRIVATE,

813

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

814

'live': BadgeType.LIVE_NOW,

815

'premium': BadgeType.AVAILABILITY_PREMIUM,

816

'verified': BadgeType.VERIFIED,

817

'official artist channel': BadgeType.VERIFIED,

}

badges = []

for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):

822

badge_type = (

823

icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

824

or badge_style_map.get(traverse_obj(badge, 'style'))

825

)

826

if badge_type:

827

badges.append({'type': badge_type})

828

continue

829

830

# fallback, won't work in some languages

831

label = traverse_obj(

832

badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')

833

for match, label_badge_type in label_map.items():

834

if match in label.lower():

835

badges.append({'type': label_badge_type})

break

return badges

@staticmethod

def _has_badge(badges, badge_type):

842

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

843

844

@staticmethod

845

def _get_text(data, *path_list, max_runs=None):

846

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

851

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

852

obj = [obj]

853

for item in obj:

854

text = try_get(item, lambda x: x['simpleText'], str)

855

if text:

856

return text

857

runs = try_get(item, lambda x: x['runs'], list) or []

858

if not runs and isinstance(item, list):

859

runs = item

860

861

runs = runs[:min(len(runs), max_runs or len(runs))]

862

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))

if text:

return text

def _get_count(self, data, *path_list):

867

count_text = self._get_text(data, *path_list) or ''

868

count = parse_count(count_text)

869

if count is None:

870

count = str_to_int(

871

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

876

"""

877

Extract thumbnails from thumbnails dict

878

@param path_list: path list to level that contains 'thumbnails' key

879

"""

880

thumbnails = []

881

for path in path_list or [()]:

882

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):

883

thumbnail_url = url_or_none(thumbnail.get('url'))

884

if not thumbnail_url:

885

continue

886

# Sometimes youtube gives a wrong thumbnail URL. See:

887

# https://github.com/yt-dlp/yt-dlp/issues/233

888

# https://github.com/ytdl-org/youtube-dl/issues/28023

889

if 'maxresdefault' in thumbnail_url:

890

thumbnail_url = thumbnail_url.split('?')[0]

891

thumbnails.append({

892

'url': thumbnail_url,

893

'height': int_or_none(thumbnail.get('height')),

894

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

900

"""

901

Extracts a relative time from string and converts to dt object

902

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'

903

"""

904

905

# XXX: this could be moved to a general function in utils/_utils.py

906

# The relative time text strings are roughly the same as what

907

# Javascript's Intl.RelativeTimeFormat function generates.

908

# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat

mobj = re.search(

relative_time_text)

if mobj:

start = mobj.group('start')

914

if start:

915

return datetime_from_str(start)

916

try:

917

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

922

if not text:

923

return

924

dt = self.extract_relative_time(text)

925

timestamp = None

926

if isinstance(dt, datetime.datetime):

927

timestamp = calendar.timegm(dt.timetuple())

928

929

if timestamp is None:

930

timestamp = (

931

unified_timestamp(text) or unified_timestamp(

932

self._search_regex(

933

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

934

text.lower(), 'time text', default=None)))

935

936

if text and timestamp is None and self._preferred_lang in (None, 'en'):

937

self.report_warning(

938

f'Cannot parse localized time text "{text}"', only_once=True)

939

return timestamp

940

941

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

942

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

943

default_client='web'):

944

raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))

945

# Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.

946

icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))

947

icd_rm = next(icd_retries)

948

main_retries = iter(self.RetryManager())

949

main_rm = next(main_retries)

950

# Manual retry loop for multiple RetryManagers

951

# The proper RetryManager MUST be advanced after an error

952

# and its result MUST be checked if the manager is non fatal

953

while True:

954

try:

955

response = self._call_api(

956

ep=ep, fatal=True, headers=headers,

957

video_id=item_id, query=query, note=note,

958

context=self._extract_context(ytcfg, default_client),

959

api_key=self._extract_api_key(ytcfg, default_client),

960

api_hostname=api_hostname, default_client=default_client)

961

except ExtractorError as e:

962

if not isinstance(e.cause, network_exceptions):

963

return self._error_or_warning(e, fatal=fatal)

964

elif not isinstance(e.cause, HTTPError):

main_rm.error = e

next(main_retries)

continue

first_bytes = e.cause.response.read(512)

970

if not is_html(first_bytes):

971

yt_error = try_get(

972

self._parse_json(

973

self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

974

lambda x: x['error']['message'], str)

975

if yt_error:

976

self._report_alerts([('ERROR', yt_error)], fatal=False)

977

# Downloading page may result in intermittent 5xx HTTP error

978

# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289

979

# We also want to catch all other network exceptions since errors in later pages can be troublesome

980

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

981

if e.cause.status not in (403, 429):

main_rm.error = e

next(main_retries)

continue

return self._error_or_warning(e, fatal=fatal)

986

987

try:

988

self._extract_and_report_alerts(response, only_once=True)

989

except ExtractorError as e:

990

# YouTube's servers may return errors we want to retry on in a 200 OK response

991

# See: https://github.com/yt-dlp/yt-dlp/issues/839

992

if 'unknown error' in e.msg.lower():

main_rm.error = e

next(main_retries)

continue

return self._error_or_warning(e, fatal=fatal)

997

# Youtube sometimes sends incomplete data

998

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

999

if not traverse_obj(response, *variadic(check_get_keys)):

1000

icd_rm.error = ExtractorError('Incomplete data received', expected=True)

1001

should_retry = next(icd_retries, None)

if not should_retry:

return None

continue

return response

@staticmethod

def is_music_url(url):

1010

return re.match(r'(https?://)?music\.youtube\.com/', url) is not None

1011

1012

def _extract_video(self, renderer):

1013

video_id = renderer.get('videoId')

1014

1015

reel_header_renderer = traverse_obj(renderer, (

1016

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

1017

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

1018

1019

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

1020

description = self._get_text(renderer, 'descriptionSnippet')

1021

1022

duration = int_or_none(renderer.get('lengthSeconds'))

1023

if duration is None:

1024

duration = parse_duration(self._get_text(

1025

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

1026

if duration is None:

1027

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

1028

duration = parse_duration(self._search_regex(

1029

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

1030

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

1031

video_id, default=None, group='duration'))

1032

1033

channel_id = traverse_obj(

1034

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

1035

expected_type=str, get_all=False)

1036

if not channel_id:

1037

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

1038

1039

channel_id = self.ucid_or_none(channel_id)

1040

1041

overlay_style = traverse_obj(

1042

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

1043

get_all=False, expected_type=str)

1044

badges = self._extract_badges(traverse_obj(renderer, 'badges'))

1045

owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))

1046

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

1047

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

1048

expected_type=str)) or ''

1049

url = f'https://www.youtube.com/watch?v={video_id}'

1050

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

1051

url = f'https://www.youtube.com/shorts/{video_id}'

1052

1053

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

1054

or self._get_text(reel_header_renderer, 'timestampText') or '')

1055

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

1056

1057

live_status = (

1058

'is_upcoming' if scheduled_timestamp is not None

1059

else 'was_live' if 'streamed' in time_text.lower()

1060

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

1061

else None)

1062

1063

# videoInfo is a string like '50K views • 10 years ago'.

1064

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

1065

view_count = (0 if 'no views' in view_count_text.lower()

1066

else self._get_count({'simpleText': view_count_text}))

1067

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

1068

1069

channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')

1070

or self._get_text(reel_header_renderer, 'channelTitleText'))

1071

1072

channel_handle = traverse_obj(renderer, (

1073

'shortBylineText', 'runs', ..., 'navigationEndpoint',

1074

(('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),

1075

expected_type=self.handle_from_url, get_all=False)

1076

return {

1077

'_type': 'url',

1078

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

1083

'duration': duration,

1084

'channel_id': channel_id,

1085

'channel': channel,

1086

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

1087

'uploader': channel,

1088

'uploader_id': channel_handle,

1089

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

1090

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

1091

'timestamp': (self._parse_time_text(time_text)

1092

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

1093

else None),

1094

'release_timestamp': scheduled_timestamp,

1095

'availability':

1096

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

1097

else self._availability(

1098

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

1099

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

1100

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

1101

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

1102

view_count_field: view_count,

1103

'live_status': live_status,

1104

'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None

}

class YoutubeIE(YoutubeBaseInfoExtractor):

1109

IE_DESC = 'YouTube'

1110

_VALID_URL = r"""(?x)^

1111

(

1112

(?:https?://|//) # http(s):// or protocol-independent URL

1113

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1114

(?:www\.)?deturl\.com/www\.youtube\.com|

1115

(?:www\.)?pwnyoutube\.com|

1116

(?:www\.)?hooktube\.com|

1117

(?:www\.)?yourepeat\.com|

1118

tube\.majestyc\.net|

1119

%(invidious)s|

1120

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1121

(?:.*?\#/)? # handle anchor (#/) redirect urls

1122

(?: # the various things that can precede the ID:

1123

1124

|(?: # or the v= param in all its forms

1125

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1126

(?:\?|\#!?) # the params delimiter ? or # or #!

1127

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1133

vid\.plus| # or vid.plus/xxxx

1134

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1135

%(invidious)s

1136

)/

1137

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1138

)

1139

)? # all until now is optional -> you can pass the naked ID

1140

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1141

(?(1).+)? # if we found the ID, everything can follow

1142

(?:\#|$)""" % {

1143

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1157

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1158

\1''',

1159

# https://wordpress.org/plugins/lazy-load-for-videos/

1160

r'''(?xs)

1161

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1162

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1163

]

1164

_RETURN_TYPE = 'video' # XXX: How to handle multifeed?

1165

1166

_PLAYER_INFO_RE = (

1167

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1168

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1169

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1170

)

1171

_formats = {

1172

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1173

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1174

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1175

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1176

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1177

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1178

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1179

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1180

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1181

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1182

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1183

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1184

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1185

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1186

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1187

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1188

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1189

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1194

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1195

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1196

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1197

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1198

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1199

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1200

1201

# Apple HTTP Live Streaming

1202

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1203

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1204

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1205

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1206

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1207

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1208

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1209

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1210

1211

# DASH mp4 video

1212

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1213

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1214

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1215

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1216

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1217

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1218

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1219

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1220

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1221

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1222

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1223

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1224

1225

# Dash mp4 audio

1226

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1227

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1228

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1229

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1230

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1231

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1232

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1233

1234

# Dash webm

1235

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1236

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1237

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1238

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1239

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1240

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1241

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1242

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1243

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1244

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1245

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1246

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1247

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1248

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1249

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1250

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1251

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1252

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1253

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1254

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1255

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1256

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1257

1258

# Dash webm audio

1259

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1260

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1261

1262

# Dash webm audio with opus inside

1263

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1264

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1265

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1266

1267

# RTMP (unnamed)

1268

'_rtmp': {'protocol': 'rtmp'},

1269

1270

# av01 video only formats sometimes served with "unknown" codecs

1271

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1272

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1273

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1274

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1275

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1276

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1277

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1278

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1279

}

1280

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1292

'channel': 'Philipp Hagemeister',

1293

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1294

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1295

'upload_date': '20121002',

1296

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1297

'categories': ['Science & Technology'],

1298

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1303

'playable_in_embed': True,

1304

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1305

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1310

'channel_follower_count': int,

1311

'uploader': 'Philipp Hagemeister',

1312

'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',

1313

'uploader_id': '@PhilippHagemeister',

1314

'heatmap': 'count:100',

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1319

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1324

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1325

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1326

'age_limit': 18,

1327

},

1328

'skip': 'Private video',

1329

},

1330

{

1331

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1332

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1337

'channel': 'Philipp Hagemeister',

1338

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1339

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1340

'upload_date': '20121002',

1341

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1342

'categories': ['Science & Technology'],

1343

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1348

'playable_in_embed': True,

1349

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1350

'live_status': 'not_live',

1351

'age_limit': 0,

1352

'comment_count': int,

1353

'channel_follower_count': int,

1354

'uploader': 'Philipp Hagemeister',

1355

'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',

1356

'uploader_id': '@PhilippHagemeister',

1357

'heatmap': 'count:100',

1358

},

1359

'params': {

1360

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1365

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1370

'description': '',

1371

'title': 'UHDTV TEST 8K VIDEO.mp4'

1372

},

1373

'params': {

1374

'youtube_include_dash_manifest': True,

1375

'format': '141',

1376

},

1377

'skip': 'format 141 not served anymore',

1378

},

1379

# DASH manifest with encrypted signature

1380

{

1381

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1386

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1387

'duration': 244,

1388

'upload_date': '20131011',

1389

'abr': 129.495,

1390

'like_count': int,

1391

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1392

'playable_in_embed': True,

1393

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1394

'view_count': int,

1395

'track': 'The Spark',

1396

'live_status': 'not_live',

1397

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1398

'channel': 'Afrojack',

1399

'tags': 'count:19',

1400

'availability': 'public',

1401

'categories': ['Music'],

1402

'age_limit': 0,

1403

'alt_title': 'The Spark',

1404

'channel_follower_count': int,

1405

'uploader': 'Afrojack',

1406

'uploader_url': 'https://www.youtube.com/@Afrojack',

1407

'uploader_id': '@Afrojack',

1408

},

1409

'params': {

1410

'youtube_include_dash_manifest': True,

1411

'format': '141/bestaudio[ext=m4a]',

1412

},

1413

},

1414

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1415

{

1416

'note': 'Embed allowed age-gate video',

1417

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1422

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1423

'duration': 142,

1424

'upload_date': '20140605',

1425

'age_limit': 18,

1426

'categories': ['Gaming'],

1427

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1428

'availability': 'needs_auth',

1429

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1430

'like_count': int,

1431

'channel': 'The Witcher',

1432

'live_status': 'not_live',

1433

'tags': 'count:17',

1434

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1435

'playable_in_embed': True,

1436

'view_count': int,

1437

'channel_follower_count': int,

1438

'uploader': 'The Witcher',

1439

'uploader_url': 'https://www.youtube.com/@thewitcher',

1440

'uploader_id': '@thewitcher',

1441

'comment_count': int,

1442

'channel_is_verified': True,

1443

'heatmap': 'count:100',

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1448

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1453

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1454

'upload_date': '20200408',

1455

'age_limit': 18,

1456

'availability': 'needs_auth',

1457

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1458

'channel': 'FlyingKitty',

1459

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1460

'view_count': int,

1461

'categories': ['Entertainment'],

1462

'live_status': 'not_live',

1463

'tags': ['Flyingkitty', 'godzilla 2'],

1464

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1465

'like_count': int,

1466

'duration': 177,

1467

'playable_in_embed': True,

1468

'channel_follower_count': int,

1469

'uploader': 'FlyingKitty',

1470

'uploader_url': 'https://www.youtube.com/@FlyingKitty900',

1471

'uploader_id': '@FlyingKitty900',

1472

'comment_count': int,

1473

'channel_is_verified': True,

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1478

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1479

'info_dict': {

1480

'id': 'Tq92D6wQ1mg',

1481

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1482

'ext': 'mp4',

1483

'upload_date': '20191228',

1484

'description': 'md5:17eccca93a786d51bc67646756894066',

1485

'age_limit': 18,

1486

'like_count': int,

1487

'availability': 'needs_auth',

1488

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1489

'view_count': int,

1490

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1491

'channel': 'Projekt Melody',

1492

'live_status': 'not_live',

1493

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1494

'playable_in_embed': True,

1495

'categories': ['Entertainment'],

1496

'duration': 106,

1497

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1498

'comment_count': int,

1499

'channel_follower_count': int,

1500

'uploader': 'Projekt Melody',

1501

'uploader_url': 'https://www.youtube.com/@ProjektMelody',

1502

'uploader_id': '@ProjektMelody',

},

},

{

'note': 'Non-Agegated non-embeddable video',

1507

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1512

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1513

'upload_date': '20130730',

1514

'track': 'Such mich find mich',

1515

'age_limit': 0,

1516

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1517

'like_count': int,

1518

'playable_in_embed': False,

1519

'creator': 'OOMPH!',

1520

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1521

'view_count': int,

1522

'alt_title': 'Such mich find mich',

1523

'duration': 210,

1524

'channel': 'Herr Lurik',

1525

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1526

'categories': ['Music'],

1527

'availability': 'public',

1528

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1529

'live_status': 'not_live',

1530

'artist': 'OOMPH!',

1531

'channel_follower_count': int,

1532

'uploader': 'Herr Lurik',

1533

'uploader_url': 'https://www.youtube.com/@HerrLurik',

1534

'uploader_id': '@HerrLurik',

},

},

{

'note': 'Non-bypassable age-gated video',

1539

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1540

'only_matching': True,

1541

},

1542

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1543

# YouTube Red ad is not captured for creator

1544

{

1545

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1551

'creator': 'deadmau5',

1552

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1553

'title': 'Deadmau5 - Some Chords (HD)',

1554

'alt_title': 'Some Chords',

1555

'availability': 'public',

1556

'tags': 'count:14',

1557

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1558

'view_count': int,

1559

'live_status': 'not_live',

1560

'channel': 'deadmau5',

1561

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1562

'like_count': int,

1563

'track': 'Some Chords',

1564

'artist': 'deadmau5',

1565

'playable_in_embed': True,

1566

'age_limit': 0,

1567

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1568

'categories': ['Music'],

1569

'album': 'Some Chords',

1570

'channel_follower_count': int,

1571

'uploader': 'deadmau5',

1572

'uploader_url': 'https://www.youtube.com/@deadmau5',

1573

'uploader_id': '@deadmau5',

1574

},

1575

'expected_warnings': [

1576

'DASH manifest missing',

1577

]

1578

},

1579

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1580

{

1581

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1587

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1588

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1589

'like_count': int,

1590

'release_timestamp': 1343767800,

1591

'playable_in_embed': True,

1592

'categories': ['Sports'],

1593

'release_date': '20120731',

1594

'channel': 'Olympics',

1595

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1596

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1597

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1598

'age_limit': 0,

1599

'availability': 'public',

1600

'live_status': 'was_live',

1601

'view_count': int,

1602

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1603

'channel_follower_count': int,

1604

'uploader': 'Olympics',

1605

'uploader_url': 'https://www.youtube.com/@Olympics',

1606

'uploader_id': '@Olympics',

1607

'channel_is_verified': True,

1608

},

1609

'params': {

1610

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1620

'duration': 85,

1621

'upload_date': '20110310',

1622

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1623

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1624

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1629

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1630

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1631

'view_count': int,

1632

'categories': ['People & Blogs'],

1633

'like_count': int,

1634

'live_status': 'not_live',

1635

'availability': 'unlisted',

1636

'comment_count': int,

1637

'channel_follower_count': int,

1638

'uploader': '孫ᄋᄅ',

1639

'uploader_url': 'https://www.youtube.com/@AllenMeow',

1640

'uploader_id': '@AllenMeow',

1641

},

1642

},

1643

# url_encoded_fmt_stream_map is empty string

1644

{

1645

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1650

'description': '',

1651

'upload_date': '20150404',

1652

},

1653

'params': {

1654

'skip_download': 'requires avconv',

1655

},

1656

'skip': 'This live event has ended.',

1657

},

1658

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1659

{

1660

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1665

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1666

'duration': 220,

1667

'upload_date': '20150625',

1668

'formats': 'mincount:31',

1669

},

1670

'skip': 'not actual anymore',

1671

},

1672

# DASH manifest with segment_list

1673

{

1674

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1675

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1680

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1681

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1682

},

1683

'params': {

1684

'youtube_include_dash_manifest': True,

1685

'format': '135', # bestvideo

1686

},

1687

'skip': 'This live event has ended.',

1688

},

1689

{

1690

# Multifeed videos (multiple cameras), URL can be of any Camera

1691

# TODO: fix multifeed titles

1692

'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',

1693

'info_dict': {

1694

'id': 'zaPI8MvL8pg',

1695

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',

1696

'description': 'md5:563ccbc698b39298481ca3c571169519',

},

'playlist': [{

'info_dict': {

'id': 'j5yGuxZ8lLU',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',

1703

'description': 'md5:563ccbc698b39298481ca3c571169519',

1704

'duration': 10120,

1705

'channel_follower_count': int,

1706

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1707

'availability': 'public',

1708

'playable_in_embed': True,

1709

'upload_date': '20131105',

1710

'categories': ['Gaming'],

1711

'live_status': 'was_live',

1712

'tags': 'count:24',

1713

'release_timestamp': 1383701910,

1714

'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',

1715

'comment_count': int,

1716

'age_limit': 0,

1717

'like_count': int,

1718

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1719

'channel': 'WiiLikeToPlay',

1720

'view_count': int,

1721

'release_date': '20131106',

1722

'uploader': 'WiiLikeToPlay',

1723

'uploader_id': '@WLTP',

1724

'uploader_url': 'https://www.youtube.com/@WLTP',

},

}, {

'info_dict': {

'id': 'zaPI8MvL8pg',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',

1731

'availability': 'public',

1732

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1733

'channel': 'WiiLikeToPlay',

1734

'channel_follower_count': int,

1735

'description': 'md5:563ccbc698b39298481ca3c571169519',

'duration': 10108,

'age_limit': 0,

'like_count': int,

'tags': 'count:24',

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1741

'release_timestamp': 1383701915,

1742

'comment_count': int,

1743

'upload_date': '20131105',

1744

'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',

1745

'release_date': '20131106',

1746

'playable_in_embed': True,

1747

'live_status': 'was_live',

1748

'categories': ['Gaming'],

1749

'view_count': int,

1750

'uploader': 'WiiLikeToPlay',

1751

'uploader_id': '@WLTP',

1752

'uploader_url': 'https://www.youtube.com/@WLTP',

},

}, {

'info_dict': {

'id': 'R7r3vfO7Hao',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',

1759

'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',

1760

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1761

'like_count': int,

1762

'availability': 'public',

1763

'playable_in_embed': True,

1764

'upload_date': '20131105',

1765

'description': 'md5:563ccbc698b39298481ca3c571169519',

1766

'channel_follower_count': int,

1767

'tags': 'count:24',

1768

'release_date': '20131106',

1769

'comment_count': int,

1770

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1771

'channel': 'WiiLikeToPlay',

1772

'categories': ['Gaming'],

1773

'release_timestamp': 1383701914,

1774

'live_status': 'was_live',

'age_limit': 0,

'duration': 10128,

'view_count': int,

'uploader': 'WiiLikeToPlay',

1779

'uploader_id': '@WLTP',

1780

'uploader_url': 'https://www.youtube.com/@WLTP',

1781

},

1782

}],

1783

'params': {'skip_download': True},

1784

},

1785

{

1786

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1787

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1788

'info_dict': {

1789

'id': 'gVfLd0zydlo',

1790

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1791

},

1792

'playlist_count': 2,

1793

'skip': 'Not multifeed anymore',

1794

},

1795

{

1796

'url': 'https://vid.plus/FlRa-iH7PGw',

1797

'only_matching': True,

1798

},

1799

{

1800

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1801

'only_matching': True,

1802

},

1803

{

1804

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1805

# Also tests cut-off URL expansion in video description (see

1806

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1807

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1808

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1813

'alt_title': 'Dark Walk',

1814

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1815

'duration': 133,

1816

'upload_date': '20151119',

1817

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1818

'track': 'Dark Walk',

1819

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1820

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1821

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1822

'categories': ['Film & Animation'],

1823

'view_count': int,

1824

'live_status': 'not_live',

1825

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1826

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1827

'tags': 'count:13',

1828

'availability': 'public',

1829

'channel': 'IronSoulElf',

1830

'playable_in_embed': True,

1831

'like_count': int,

1832

'age_limit': 0,

1833

'channel_follower_count': int

1834

},

1835

'params': {

1836

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1841

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1842

'only_matching': True,

1843

},

1844

{

1845

# Video with yt:stretch=17:0

1846

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1851

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1852

'upload_date': '20151107',

1853

},

1854

'params': {

1855

'skip_download': True,

1856

},

1857

'skip': 'This video does not exist.',

1858

},

1859

{

1860

# Video with incomplete 'yt:stretch=16:'

1861

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1862

'only_matching': True,

1863

},

1864

{

1865

# Video licensed under Creative Commons

1866

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1871

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1872

'duration': 721,

1873

'upload_date': '20150128',

1874

'license': 'Creative Commons Attribution license (reuse allowed)',

1875

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1876

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1877

'like_count': int,

1878

'age_limit': 0,

1879

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1880

'channel': 'The Berkman Klein Center for Internet & Society',

1881

'availability': 'public',

1882

'view_count': int,

1883

'categories': ['Education'],

1884

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1885

'live_status': 'not_live',

1886

'playable_in_embed': True,

1887

'channel_follower_count': int,

1888

'chapters': list,

1889

'uploader': 'The Berkman Klein Center for Internet & Society',

1890

'uploader_id': '@BKCHarvard',

1891

'uploader_url': 'https://www.youtube.com/@BKCHarvard',

1892

},

1893

'params': {

1894

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1903

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1904

'duration': 4060,

1905

'upload_date': '20151120',

1906

'license': 'Creative Commons Attribution license (reuse allowed)',

1907

'playable_in_embed': True,

1908

'tags': 'count:12',

1909

'like_count': int,

1910

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1911

'age_limit': 0,

1912

'availability': 'public',

1913

'categories': ['News & Politics'],

1914

'channel': 'Bernie Sanders',

1915

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1916

'view_count': int,

1917

'live_status': 'not_live',

1918

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1919

'comment_count': int,

1920

'channel_follower_count': int,

1921

'chapters': list,

1922

'uploader': 'Bernie Sanders',

1923

'uploader_url': 'https://www.youtube.com/@BernieSanders',

1924

'uploader_id': '@BernieSanders',

1925

'channel_is_verified': True,

1926

'heatmap': 'count:100',

1927

},

1928

'params': {

1929

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1934

'only_matching': True,

1935

},

1936

{

1937

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1938

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1939

'only_matching': True,

1940

},

1941

{

1942

# Rental video preview

1943

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1948

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1949

'upload_date': '20150811',

1950

'license': 'Standard YouTube License',

1951

},

1952

'params': {

1953

'skip_download': True,

1954

},

1955

'skip': 'This video is not available.',

1956

},

1957

{

1958

# YouTube Red video with episode data

1959

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1964

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1965

'duration': 2085,

1966

'upload_date': '20170118',

1967

'series': 'Mind Field',

1968

'season_number': 1,

1969

'episode_number': 1,

1970

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1971

'tags': 'count:12',

1972

'view_count': int,

1973

'availability': 'public',

1974

'age_limit': 0,

1975

'channel': 'Vsauce',

1976

'episode': 'Episode 1',

1977

'categories': ['Entertainment'],

1978

'season': 'Season 1',

1979

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1980

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1981

'like_count': int,

1982

'playable_in_embed': True,

1983

'live_status': 'not_live',

1984

'channel_follower_count': int,

1985

'uploader': 'Vsauce',

1986

'uploader_url': 'https://www.youtube.com/@Vsauce',

1987

'uploader_id': '@Vsauce',

1988

'comment_count': int,

1989

'channel_is_verified': True,

1990

},

1991

'params': {

1992

'skip_download': True,

1993

},

1994

'expected_warnings': [

1995

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

2000

# as inappropriate or offensive to some audiences.

2001

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

2006

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

2007

'duration': 965,

2008

'upload_date': '20140124',

2009

},

2010

'params': {

2011

'skip_download': True,

2012

},

2013

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

2018

'only_matching': True,

2019

},

2020

{

2021

# geo restricted to JP

2022

'url': 'sJL6WA-aGkQ',

2023

'only_matching': True,

2024

},

2025

{

2026

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

2027

'only_matching': True,

2028

},

2029

{

2030

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

2031

'only_matching': True,

2032

},

2033

{

2034

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

2035

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

2036

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

2041

'only_matching': True,

2042

},

2043

{

2044

# Video with unsupported adaptive stream type formats

2045

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

2050

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

2051

'duration': 433,

2052

'upload_date': '20130923',

2053

'formats': 'maxcount:10',

2054

},

2055

'params': {

2056

'skip_download': True,

2057

'youtube_include_dash_manifest': False,

2058

},

2059

'skip': 'not actual anymore',

2060

},

2061

{

2062

# Youtube Music Auto-generated description

2063

# TODO: fix metadata extraction

2064

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

2069

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

2070

'upload_date': '20190312',

2071

'artist': 'Stephen',

2072

'track': 'Voyeur Girl',

2073

'album': 'it\'s too much love to know my dear',

2074

'release_date': '20190313',

2075

'alt_title': 'Voyeur Girl',

2076

'view_count': int,

2077

'playable_in_embed': True,

2078

'like_count': int,

2079

'categories': ['Music'],

2080

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

2081

'channel': 'Stephen', # TODO: should be "Stephen - Topic"

2082

'uploader': 'Stephen',

2083

'availability': 'public',

2084

'creator': 'Stephen',

2085

'duration': 169,

2086

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

2087

'age_limit': 0,

2088

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

2089

'tags': 'count:11',

2090

'live_status': 'not_live',

2091

'channel_follower_count': int

2092

},

2093

'params': {

2094

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

2099

'only_matching': True,

2100

},

2101

{

2102

# invalid -> valid video id redirection

2103

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

2108

'description': 'md5:bf577a41da97918e94fa9798d9228825',

2109

'upload_date': '20090125',

2110

'artist': 'Panjabi MC',

2111

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

2112

'album': 'Beware of the Boys (Mundian To Bach Ke)',

2113

},

2114

'params': {

2115

'skip_download': True,

2116

},

2117

'skip': 'Video unavailable',

2118

},

2119

{

2120

# empty description results in an empty string

2121

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2128

'view_count': int,

2129

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2130

'like_count': int,

2131

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2132

'tags': [],

2133

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2134

'availability': 'public',

2135

'age_limit': 0,

2136

'categories': ['Pets & Animals'],

2137

'duration': 7,

2138

'playable_in_embed': True,

2139

'live_status': 'not_live',

2140

'channel': 'l\'Or Vert asbl',

2141

'channel_follower_count': int,

2142

'uploader': 'l\'Or Vert asbl',

2143

'uploader_url': 'https://www.youtube.com/@ElevageOrVert',

2144

'uploader_id': '@ElevageOrVert',

2145

},

2146

'params': {

2147

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2152

# see [2] for an example with '};' inside ytInitialPlayerResponse

2153

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2154

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2155

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2160

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2161

'upload_date': '20130831',

2162

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2163

'like_count': int,

2164

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2165

'live_status': 'not_live',

2166

'categories': ['Education'],

2167

'availability': 'public',

2168

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2169

'tags': 'count:12',

2170

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2175

'comment_count': int,

2176

'channel_follower_count': int,

2177

'chapters': list,

2178

'uploader': 'kudvenkat',

2179

'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',

2180

'uploader_id': '@Csharp-video-tutorialsBlogspot',

2181

'channel_is_verified': True,

2182

'heatmap': 'count:100',

2183

},

2184

'params': {

2185

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2190

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2191

'only_matching': True,

2192

},

2193

{

2194

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2195

'only_matching': True,

2196

},

2197

{

2198

# https://github.com/ytdl-org/youtube-dl/pull/28094

2199

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2205

'upload_date': '20141120',

2206

'artist': 'The Cinematic Orchestra',

2207

'track': 'Burn Out',

2208

'album': 'Every Day',

2209

'like_count': int,

2210

'live_status': 'not_live',

2211

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2216

'creator': 'The Cinematic Orchestra',

2217

'channel': 'The Cinematic Orchestra',

2218

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2219

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2220

'availability': 'public',

2221

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2222

'categories': ['Music'],

2223

'playable_in_embed': True,

2224

'channel_follower_count': int,

2225

'uploader': 'The Cinematic Orchestra',

2226

'comment_count': int,

2227

},

2228

'params': {

2229

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2234

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2235

'only_matching': True,

2236

},

2237

{

2238

# controversial video, requires bpctr/contentCheckOk

2239

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2244

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2245

'upload_date': '20140716',

2246

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2247

'duration': 170,

2248

'categories': ['News & Politics'],

2249

'view_count': int,

2250

'channel': 'CBS Mornings',

2251

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2252

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2253

'age_limit': 18,

2254

'availability': 'needs_auth',

2255

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2256

'like_count': int,

2257

'live_status': 'not_live',

2258

'playable_in_embed': True,

2259

'channel_follower_count': int,

2260

'uploader': 'CBS Mornings',

2261

'uploader_url': 'https://www.youtube.com/@CBSMornings',

2262

'uploader_id': '@CBSMornings',

2263

'comment_count': int,

2264

'channel_is_verified': True,

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2269

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2274

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2275

'upload_date': '20201120',

2276

'duration': 1456,

2277

'categories': ['Travel & Events'],

2278

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2279

'view_count': int,

2280

'channel': 'Walk around Japan',

2281

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2282

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2283

'age_limit': 0,

2284

'availability': 'public',

2285

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2286

'live_status': 'not_live',

2287

'playable_in_embed': True,

2288

'channel_follower_count': int,

2289

'uploader': 'Walk around Japan',

2290

'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',

2291

'uploader_id': '@walkaroundjapan7124',

2292

},

2293

'params': {

2294

'skip_download': True,

2295

},

2296

}, {

2297

# Has multiple audio streams

2298

'url': 'WaOKSUlf4TM',

2299

'only_matching': True

2300

}, {

2301

# Requires Premium: has format 141 when requested using YTM url

2302

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2303

'only_matching': True

2304

}, {

2305

# multiple subtitles with same lang_code

2306

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2307

'only_matching': True,

2308

}, {

2309

# Force use android client fallback

2310

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2311

'info_dict': {

2312

'id': 'YOelRv7fMxY',

2313

'title': 'DIGGING A SECRET TUNNEL Part 1',

2314

'ext': '3gp',

2315

'upload_date': '20210624',

2316

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2317

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2318

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2319

'duration': 596,

2320

'categories': ['Entertainment'],

2321

'view_count': int,

2322

'channel': 'colinfurze',

2323

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2324

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2325

'age_limit': 0,

2326

'availability': 'public',

2327

'like_count': int,

2328

'live_status': 'not_live',

2329

'playable_in_embed': True,

2330

'channel_follower_count': int,

2331

'chapters': list,

2332

'uploader': 'colinfurze',

2333

'uploader_url': 'https://www.youtube.com/@colinfurze',

2334

'uploader_id': '@colinfurze',

2335

'comment_count': int,

2336

'channel_is_verified': True,

2337

'heatmap': 'count:100',

2338

},

2339

'params': {

2340

'format': '17', # 3gp format available on android

2341

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2346

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2347

'only_matching': True,

2348

'params': {

2349

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2354

'only_matching': True,

2355

}, {

2356

'note': 'Storyboards',

2357

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2363

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2364

'upload_date': '20140324',

2365

'like_count': int,

2366

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2367

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2368

'view_count': int,

2369

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2370

'playable_in_embed': True,

2371

'tags': 'count:12',

2372

'availability': 'public',

2373

'channel': 'SciShow',

2374

'live_status': 'not_live',

2375

'duration': 248,

2376

'categories': ['Education'],

2377

'age_limit': 0,

2378

'channel_follower_count': int,

2379

'chapters': list,

2380

'uploader': 'SciShow',

2381

'uploader_url': 'https://www.youtube.com/@SciShow',

2382

'uploader_id': '@SciShow',

2383

'comment_count': int,

2384

'channel_is_verified': True,

2385

'heatmap': 'count:100',

2386

}, 'params': {'format': 'mhtml', 'skip_download': True}

2387

}, {

2388

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2389

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2394

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2395

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2396

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2401

'tags': 'count:23',

2402

'playable_in_embed': True,

2403

'live_status': 'not_live',

2404

'upload_date': '20220103',

2405

'like_count': int,

2406

'availability': 'public',

2407

'channel': 'Leon Nguyen',

2408

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2409

'comment_count': int,

2410

'channel_follower_count': int,

2411

'uploader': 'Leon Nguyen',

2412

'uploader_url': 'https://www.youtube.com/@LeonNguyen',

2413

'uploader_id': '@LeonNguyen',

2414

'heatmap': 'count:100',

2415

}

2416

}, {

2417

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2418

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2423

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2424

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2425

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2430

'tags': 'count:23',

2431

'playable_in_embed': True,

2432

'live_status': 'not_live',

2433

'upload_date': '20220102',

2434

'like_count': int,

2435

'availability': 'public',

2436

'channel': 'Leon Nguyen',

2437

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2438

'comment_count': int,

2439

'channel_follower_count': int,

2440

'uploader': 'Leon Nguyen',

2441

'uploader_url': 'https://www.youtube.com/@LeonNguyen',

2442

'uploader_id': '@LeonNguyen',

2443

'heatmap': 'count:100',

2444

},

2445

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2446

}, {

2447

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2448

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2453

'description': 'md5:978296ec9783a031738b684d4ebf302d',

2454

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2455

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2460

'tags': 'count:26',

2461

'playable_in_embed': True,

2462

'live_status': 'not_live',

2463

'release_timestamp': 1641172509,

2464

'release_date': '20220103',

2465

'upload_date': '20220103',

2466

'like_count': int,

2467

'availability': 'public',

2468

'channel': 'Quackity',

2469

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2470

'channel_follower_count': int,

2471

'uploader': 'Quackity',

2472

'uploader_id': '@Quackity',

2473

'uploader_url': 'https://www.youtube.com/@Quackity',

2474

'comment_count': int,

2475

'channel_is_verified': True,

2476

'heatmap': 'count:100',

2477

}

2478

},

2479

{ # continuous livestream. Microformat upload date should be preferred.

2480

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2481

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2482

'info_dict': {

2483

'id': 'kgx4WGK0oNU',

2484

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2485

'ext': 'mp4',

2486

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2487

'availability': 'public',

2488

'age_limit': 0,

2489

'release_timestamp': 1637975704,

2490

'upload_date': '20210619',

2491

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2492

'live_status': 'is_live',

2493

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2494

'channel': 'Abao in Tokyo',

2495

'channel_follower_count': int,

2496

'release_date': '20211127',

2497

'tags': 'count:39',

2498

'categories': ['People & Blogs'],

2499

'like_count': int,

2500

'view_count': int,

2501

'playable_in_embed': True,

2502

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2503

'concurrent_view_count': int,

2504

'uploader': 'Abao in Tokyo',

2505

'uploader_url': 'https://www.youtube.com/@abaointokyo',

2506

'uploader_id': '@abaointokyo',

2507

},

2508

'params': {'skip_download': True}

2509

}, {

2510

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2515

'upload_date': '20220323',

2516

'like_count': int,

2517

'availability': 'unlisted',

2518

'channel': 'Lesmiscore',

2519

'thumbnail': r're:^https?://.*\.jpg',

2520

'age_limit': 0,

2521

'categories': ['Music'],

2522

'view_count': int,

2523

'description': '',

2524

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2525

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2526

'live_status': 'not_live',

2527

'playable_in_embed': True,

2528

'channel_follower_count': int,

2529

'duration': 6,

2530

'tags': [],

2531

'uploader_id': '@lesmiscore',

2532

'uploader': 'Lesmiscore',

2533

'uploader_url': 'https://www.youtube.com/@lesmiscore',

2534

}

2535

}, {

2536

# Prefer primary title+description language metadata by default

2537

# Do not prefer translated description if primary is empty

2538

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2543

'description': '',

2544

'channel': 'cole-dlp-test-acc',

2545

'tags': [],

2546

'view_count': int,

2547

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2548

'like_count': int,

2549

'playable_in_embed': True,

2550

'availability': 'unlisted',

2551

'thumbnail': r're:^https?://.*\.jpg',

2552

'age_limit': 0,

2553

'duration': 5,

2554

'live_status': 'not_live',

2555

'upload_date': '20220908',

2556

'categories': ['People & Blogs'],

2557

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2558

'uploader_url': 'https://www.youtube.com/@coletdjnz',

2559

'uploader_id': '@coletdjnz',

2560

'uploader': 'cole-dlp-test-acc',

2561

},

2562

'params': {'skip_download': True}

2563

}, {

2564

# Extractor argument: prefer translated title+description

2565

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2570

'tags': [],

2571

'duration': 5,

2572

'live_status': 'not_live',

2573

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2574

'upload_date': '20220728',

2575

'view_count': int,

2576

'categories': ['People & Blogs'],

2577

'thumbnail': r're:^https?://.*\.jpg',

2578

'title': 'dlp test video title translated (fr)',

2579

'availability': 'public',

2580

'age_limit': 0,

2581

'description': 'dlp test video description translated (fr)',

2582

'playable_in_embed': True,

2583

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2584

'uploader_url': 'https://www.youtube.com/@coletdjnz',

2585

'uploader_id': '@coletdjnz',

2586

'uploader': 'cole-dlp-test-acc',

2587

},

2588

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2589

'expected_warnings': [r'Preferring "fr" translated fields'],

2590

}, {

2591

'note': '6 channel audio',

2592

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2593

'only_matching': True,

2594

}, {

2595

'note': 'Multiple HLS formats with same itag',

2596

'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',

'info_dict': {

'id': 'kX3nB4PpJko',

'ext': 'mp4',

'categories': ['Entertainment'],

2601

'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',

2602

'live_status': 'not_live',

2603

'duration': 937,

2604

'channel_follower_count': int,

2605

'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',

2606

'title': 'Last To Take Hand Off Jet, Keeps It!',

2607

'channel': 'MrBeast',

2608

'playable_in_embed': True,

2609

'view_count': int,

2610

'upload_date': '20221112',

2611

'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',

2612

'age_limit': 0,

2613

'availability': 'public',

2614

'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',

2615

'like_count': int,

2616

'tags': [],

2617

'uploader': 'MrBeast',

2618

'uploader_url': 'https://www.youtube.com/@MrBeast',

2619

'uploader_id': '@MrBeast',

2620

'comment_count': int,

2621

'channel_is_verified': True,

2622

'heatmap': 'count:100',

2623

},

2624

'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},

2625

}, {

2626

'note': 'Audio formats with Dynamic Range Compression',

2627

'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',

'info_dict': {

'id': 'Tq92D6wQ1mg',

'ext': 'webm',

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

2632

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2633

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2634

'channel_follower_count': int,

2635

'description': 'md5:17eccca93a786d51bc67646756894066',

2636

'upload_date': '20191228',

2637

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

2638

'playable_in_embed': True,

2639

'like_count': int,

2640

'categories': ['Entertainment'],

2641

'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',

2642

'age_limit': 18,

2643

'channel': 'Projekt Melody',

2644

'view_count': int,

2645

'availability': 'needs_auth',

2646

'comment_count': int,

2647

'live_status': 'not_live',

2648

'duration': 106,

2649

'uploader': 'Projekt Melody',

2650

'uploader_id': '@ProjektMelody',

2651

'uploader_url': 'https://www.youtube.com/@ProjektMelody',

2652

},

2653

'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},

2654

},

2655

{

2656

'url': 'https://www.youtube.com/live/qVv6vCqciTM',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'comment_count': int,

2662

'chapters': 'count:13',

2663

'upload_date': '20221223',

2664

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

2665

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2666

'like_count': int,

2667

'release_date': '20221223',

2668

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

2669

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

2670

'view_count': int,

2671

'playable_in_embed': True,

2672

'duration': 4438,

2673

'availability': 'public',

2674

'channel_follower_count': int,

2675

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2676

'categories': ['Entertainment'],

2677

'live_status': 'was_live',

2678

'release_timestamp': 1671793345,

2679

'channel': 'さなちゃんねる',

2680

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

2681

'uploader': 'さなちゃんねる',

2682

'uploader_url': 'https://www.youtube.com/@sana_natori',

2683

'uploader_id': '@sana_natori',

2684

'channel_is_verified': True,

2685

'heatmap': 'count:100',

},

},

{

# Fallbacks when webpage and web client is unavailable

2690

'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',

'info_dict': {

'id': 'wSSmNUl9Snw',

'ext': 'mp4',

# 'categories': ['Science & Technology'],

2695

'view_count': int,

2696

'chapters': 'count:2',

2697

'channel': 'Scott Manley',

2698

'like_count': int,

2699

'age_limit': 0,

2700

# 'availability': 'public',

2701

'channel_follower_count': int,

2702

'live_status': 'not_live',

2703

'upload_date': '20170831',

2704

'duration': 682,

2705

'tags': 'count:8',

2706

'uploader_url': 'https://www.youtube.com/@scottmanley',

2707

'description': 'md5:f4bed7b200404b72a394c2f97b782c02',

2708

'uploader': 'Scott Manley',

2709

'uploader_id': '@scottmanley',

2710

'title': 'The Computer Hack That Saved Apollo 14',

2711

'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',

2712

'thumbnail': r're:^https?://.*\.webp',

2713

'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',

2714

'playable_in_embed': True,

2715

'comment_count': int,

2716

'channel_is_verified': True,

2717

'heatmap': 'count:100',

2718

},

2719

'params': {

2720

'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},

},

},

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2727

{

2728

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2729

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2734

'upload_date': '20080526',

2735

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2736

'age_limit': 0,

2737

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2738

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2739

'playable_in_embed': True,

2740

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2741

'like_count': int,

2742

'comment_count': int,

2743

'channel': 'Christopher Sykes',

2744

'live_status': 'not_live',

2745

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2746

'availability': 'public',

2747

'duration': 195,

2748

'view_count': int,

2749

'categories': ['Science & Technology'],

2750

'channel_follower_count': int,

2751

'uploader': 'Christopher Sykes',

2752

'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',

2753

'uploader_id': '@ChristopherSykesDocumentaries',

2754

'heatmap': 'count:100',

2755

},

2756

'params': {

2757

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2764

from ..utils import parse_qs

2765

2766

qs = parse_qs(url)

2767

if qs.get('list', [None])[0]:

2768

return False

2769

return super().suitable(url)

2770

2771

def __init__(self, *args, **kwargs):

2772

super().__init__(*args, **kwargs)

2773

self._code_cache = {}

2774

self._player_cache = {}

2775

2776

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2777

lock = threading.Lock()

2778

start_time = time.time()

2779

formats = [f for f in formats if f.get('is_from_start')]

2780

2781

def refetch_manifest(format_id, delay):

2782

nonlocal formats, start_time, is_live

2783

if time.time() <= start_time + delay:

2784

return

2785

2786

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2787

video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)

2788

microformats = traverse_obj(

2789

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2790

expected_type=dict)

2791

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2792

is_live = live_status == 'is_live'

2793

start_time = time.time()

2794

2795

def mpd_feed(format_id, delay):

2796

"""

2797

@returns (manifest_url, manifest_stream_number, is_live) or None

2798

"""

2799

for retry in self.RetryManager(fatal=False):

2800

with lock:

2801

refetch_manifest(format_id, delay)

2802

2803

f = next((f for f in formats if f['format_id'] == format_id), None)

2804

if not f:

2805

if not is_live:

2806

retry.error = f'{video_id}: Video is no longer live'

2807

else:

2808

retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'

2809

continue

2810

return f['manifest_url'], f['manifest_stream_number'], is_live

return None

for f in formats:

f['is_live'] = is_live

2815

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2816

live_start_time, mpd_feed, not is_live and f.copy())

2817

if is_live:

2818

f['fragments'] = gen

2819

f['protocol'] = 'http_dash_segments_generator'

2820

else:

2821

f['fragments'] = LazyList(gen({}))

2822

del f['is_from_start']

2823

2824

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2825

FETCH_SPAN, MAX_DURATION = 5, 432000

2826

2827

mpd_url, stream_number, is_live = None, None, True

2828

2829

begin_index = 0

2830

download_start_time = ctx.get('start') or time.time()

2831

2832

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2833

if lack_early_segments:

2834

self.report_warning(bug_reports_message(

2835

'Starting download from the last 120 hours of the live stream since '

2836

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2837

lack_early_segments = True

2838

2839

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2840

fragments, fragment_base_url = None, None

2841

2842

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2843

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2844

# Obtain from MPD's maximum seq value

2845

old_mpd_url = mpd_url

2846

last_error = ctx.pop('last_error', None)

2847

expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403

2848

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2849

or (mpd_url, stream_number, False))

2850

if not refresh_sequence:

2851

if expire_fast and not is_live:

2852

return False, last_seq

2853

elif old_mpd_url == mpd_url:

2854

return True, last_seq

2855

if manifestless_orig_fmt:

2856

fmt_info = manifestless_orig_fmt

2857

else:

2858

try:

2859

fmts, _ = self._extract_mpd_formats_and_subtitles(

2860

mpd_url, None, note=False, errnote=False, fatal=False)

2861

except ExtractorError:

2862

fmts = None

2863

if not fmts:

2864

no_fragment_score += 2

2865

return False, last_seq

2866

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2867

fragments = fmt_info['fragments']

2868

fragment_base_url = fmt_info['fragment_base_url']

2869

assert fragment_base_url

2870

2871

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2872

return True, _last_seq

2873

2874

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2875

while is_live:

2876

fetch_time = time.time()

2877

if no_fragment_score > 30:

2878

return

2879

if last_segment_url:

2880

# Obtain from "X-Head-Seqnum" header value from each segment

2881

try:

2882

urlh = self._request_webpage(

2883

last_segment_url, None, note=False, errnote=False, fatal=False)

2884

except ExtractorError:

2885

urlh = None

2886

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2887

if last_seq is None:

2888

no_fragment_score += 2

2889

last_segment_url = None

2890

continue

2891

else:

2892

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2893

no_fragment_score += 2

2894

if not should_continue:

2895

continue

2896

2897

if known_idx > last_seq:

2898

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2904

# skip from the start when it's negative value

2905

known_idx = last_seq + begin_index

2906

if lack_early_segments:

2907

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2908

try:

2909

for idx in range(known_idx, last_seq):

2910

# do not update sequence here or you'll get skipped some part of it

2911

should_continue, _ = _extract_sequence_from_mpd(False, False)

2912

if not should_continue:

2913

known_idx = idx - 1

2914

raise ExtractorError('breaking out of outer loop')

2915

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2916

yield {

2917

'url': last_segment_url,

2918

'fragment_count': last_seq,

2919

}

2920

if known_idx == last_seq:

2921

no_fragment_score += 5

2922

else:

2923

no_fragment_score = 0

2924

known_idx = last_seq

2925

except ExtractorError:

2926

continue

2927

2928

if manifestless_orig_fmt:

2929

# Stop at the first iteration if running for post-live manifestless;

2930

# fragment count no longer increase since it starts

2931

break

2932

2933

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2934

2935

def _extract_player_url(self, *ytcfgs, webpage=None):

2936

player_url = traverse_obj(

2937

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2938

get_all=False, expected_type=str)

2939

if not player_url:

2940

return

2941

return urljoin('https://www.youtube.com', player_url)

2942

2943

def _download_player_url(self, video_id, fatal=False):

2944

res = self._download_webpage(

2945

'https://www.youtube.com/iframe_api',

2946

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2947

if res:

2948

player_version = self._search_regex(

2949

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2950

if player_version:

2951

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2952

2953

def _signature_cache_id(self, example_sig):

2954

""" Return a string representation of a signature """

2955

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2956

2957

@classmethod

2958

def _extract_player_info(cls, player_url):

2959

for player_re in cls._PLAYER_INFO_RE:

2960

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2965

return id_m.group('id')

2966

2967

def _load_player(self, video_id, player_url, fatal=True):

2968

player_id = self._extract_player_info(player_url)

2969

if player_id not in self._code_cache:

2970

code = self._download_webpage(

2971

player_url, video_id, fatal=fatal,

2972

note='Downloading player ' + player_id,

2973

errnote='Download of %s failed' % player_url)

2974

if code:

2975

self._code_cache[player_id] = code

2976

return self._code_cache.get(player_id)

2977

2978

def _extract_signature_function(self, video_id, player_url, example_sig):

2979

player_id = self._extract_player_info(player_url)

2980

2981

# Read from filesystem cache

2982

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2983

assert os.path.basename(func_id) == func_id

2984

2985

self.write_debug(f'Extracting signature function {func_id}')

2986

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2987

2988

if not cache_spec:

2989

code = self._load_player(video_id, player_url)

2990

if code:

2991

res = self._parse_sig_js(code)

2992

test_string = ''.join(map(chr, range(len(example_sig))))

2993

cache_spec = [ord(c) for c in res(test_string)]

2994

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2995

2996

return lambda s: ''.join(s[i] for i in cache_spec)

2997

2998

def _print_sig_code(self, func, example_sig):

2999

if not self.get_param('youtube_print_sig_code'):

3000

return

3001

3002

def gen_sig_code(idxs):

3003

def _genslice(start, end, step):

3004

starts = '' if start == 0 else str(start)

3005

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

3006

steps = '' if step == 1 else (':%d' % step)

3007

return f's[{starts}{ends}{steps}]'

3008

3009

step = None

3010

# Quelch pyflakes warnings - start will be set when step is set

3011

start = '(Never used)'

3012

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

3017

step = None

3018

continue

3019

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

3029

3030

test_string = ''.join(map(chr, range(len(example_sig))))

3031

cache_res = func(test_string)

3032

cache_spec = [ord(c) for c in cache_res]

3033

expr_code = ' + '.join(gen_sig_code(cache_spec))

3034

signature_id_tuple = '(%s)' % (

3035

', '.join(str(len(p)) for p in example_sig.split('.')))

3036

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

3037

' return %s\n') % (signature_id_tuple, expr_code)

3038

self.to_screen('Extracted signature function:\n' + code)

3039

3040

def _parse_sig_js(self, jscode):

3041

funcname = self._search_regex(

3042

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3043

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3044

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

3045

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

3046

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$)?',

3047

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

3048

# Obsolete patterns

3049

r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3050

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

3051

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3052

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3053

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

3054

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

3055

jscode, 'Initial JS player signature function name', group='sig')

3056

3057

jsi = JSInterpreter(jscode)

3058

initial_function = jsi.extract_function(funcname)

3059

return lambda s: initial_function([s])

3060

3061

def _cached(self, func, *cache_id):

3062

def inner(*args, **kwargs):

3063

if cache_id not in self._player_cache:

3064

try:

3065

self._player_cache[cache_id] = func(*args, **kwargs)

3066

except ExtractorError as e:

3067

self._player_cache[cache_id] = e

3068

except Exception as e:

3069

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

3070

3071

ret = self._player_cache[cache_id]

3072

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

3078

"""Turn the encrypted s field into a working signature"""

3079

extract_sig = self._cached(

3080

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

3081

func = extract_sig(video_id, player_url, s)

3082

self._print_sig_code(func, s)

3083

return func(s)

3084

3085

def _decrypt_nsig(self, s, video_id, player_url):

3086

"""Turn the encrypted n field into a working signature"""

3087

if player_url is None:

3088

raise ExtractorError('Cannot decrypt nsig without player_url')

3089

player_url = urljoin('https://www.youtube.com', player_url)

3090

3091

try:

3092

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

3093

except ExtractorError as e:

3094

raise ExtractorError('Unable to extract nsig function code', cause=e)

3095

if self.get_param('youtube_print_sig_code'):

3096

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

3097

3098

try:

3099

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

3100

ret = extract_nsig(jsi, func_code)(s)

3101

except JSInterpreter.Exception as e:

3102

try:

3103

jsi = PhantomJSwrapper(self, timeout=5000)

3104

except ExtractorError:

3105

raise e

3106

self.report_warning(

3107

f'Native nsig extraction failed: Trying with PhantomJS\n'

3108

f' n = {s} ; player = {player_url}', video_id)

3109

self.write_debug(e, only_once=True)

3110

3111

args, func_body = func_code

3112

ret = jsi.execute(

3113

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

3114

video_id=video_id, note='Executing signature code').strip()

3115

3116

self.write_debug(f'Decrypted nsig {s} => {ret}')

3117

return ret

3118

3119

def _extract_n_function_name(self, jscode):

3120

funcname, idx = self._search_regex(

3121

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

3122

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

3127

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,

3128

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

3129

3130

def _extract_n_function_code(self, video_id, player_url):

3131

player_id = self._extract_player_info(player_url)

3132

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

3133

jscode = func_code or self._load_player(video_id, player_url)

3134

jsi = JSInterpreter(jscode)

3135

3136

if func_code:

3137

return jsi, player_id, func_code

3138

3139

func_name = self._extract_n_function_name(jscode)

3140

3141

# For redundancy

3142

func_code = self._search_regex(

3143

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

3144

# NB: The end of the regex is intentionally kept strict

3145

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

3146

jscode, 'nsig function', group=('var', 'code'), default=None)

3147

if func_code:

3148

func_code = ([func_code[0]], func_code[1])

3149

else:

3150

self.write_debug('Extracting nsig function with jsinterp')

3151

func_code = jsi.extract_function_code(func_name)

3152

3153

self.cache.store('youtube-nsig', player_id, func_code)

3154

return jsi, player_id, func_code

3155

3156

def _extract_n_function_from_code(self, jsi, func_code):

3157

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

3163

raise

3164

except Exception as e:

3165

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

3166

3167

if ret.startswith('enhanced_except_'):

3168

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

3174

"""

3175

Extract signatureTimestamp (sts)

3176

Required to tell API what sig/player version is in use.

3177

"""

3178

sts = None

3179

if isinstance(ytcfg, dict):

3180

sts = int_or_none(ytcfg.get('STS'))

3181

3182

if not sts:

3183

# Attempt to extract from player

3184

if player_url is None:

3185

error_msg = 'Cannot extract signature timestamp without player_url.'

3186

if fatal:

3187

raise ExtractorError(error_msg)

3188

self.report_warning(error_msg)

3189

return

3190

code = self._load_player(video_id, player_url, fatal=fatal)

3191

if code:

3192

sts = int_or_none(self._search_regex(

3193

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

3194

'JS player signature timestamp', group='sts', fatal=fatal))

3195

return sts

3196

3197

def _mark_watched(self, video_id, player_responses):

3198

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

3199

label = 'fully ' if is_full else ''

3200

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

3201

expected_type=url_or_none)

3202

if not url:

3203

self.report_warning(f'Unable to mark {label}watched')

3204

return

3205

parsed_url = urllib.parse.urlparse(url)

3206

qs = urllib.parse.parse_qs(parsed_url.query)

3207

3208

# cpn generation algorithm is reverse engineered from base.js.

3209

# In fact it works even with dummy cpn.

3210

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

3211

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

3212

3213

# # more consistent results setting it to right before the end

3214

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

3225

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

3232

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

3233

3234

self._download_webpage(

3235

url, video_id, f'Marking {label}watched',

3236

'Unable to mark watched', fatal=False)

3237

3238

@classmethod

3239

def _extract_from_webpage(cls, url, webpage):

3240

# Invidious Instances

3241

# https://github.com/yt-dlp/yt-dlp/issues/195

3242

# https://github.com/iv-org/invidious/pull/1730

3243

mobj = re.search(

3244

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3245

webpage)

3246

if mobj:

3247

yield cls.url_result(mobj.group('url'), cls)

3248

raise cls.StopExtraction()

3249

3250

yield from super()._extract_from_webpage(url, webpage)

3251

3252

# lazyYT YouTube embed

3253

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3254

yield cls.url_result(unescapeHTML(id_), cls, id_)

3255

3256

# Wordpress "YouTube Video Importer" plugin

3257

for m in re.findall(r'''(?x)<div[^>]+

3258

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3259

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3260

yield cls.url_result(m[-1], cls, m[-1])

3261

3262

@classmethod

3263

def extract_id(cls, url):

3264

video_id = cls.get_temp_id(url)

3265

if not video_id:

3266

raise ExtractorError(f'Invalid URL: {url}')

3267

return video_id

3268

3269

def _extract_chapters_from_json(self, data, duration):

3270

chapter_list = traverse_obj(

3271

data, (

3272

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3273

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3274

), expected_type=list)

3275

3276

return self._extract_chapters_helper(

3277

chapter_list,

3278

start_function=lambda chapter: float_or_none(

3279

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3280

title_function=lambda chapter: traverse_obj(

3281

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3282

duration=duration)

3283

3284

def _extract_chapters_from_engagement_panel(self, data, duration):

3285

content_list = traverse_obj(

3286

data,

3287

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3288

expected_type=list)

3289

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3290

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3291

3292

return next(filter(None, (

3293

self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3294

chapter_time, chapter_title, duration)

3295

for contents in content_list)), [])

3296

3297

def _extract_heatmap(self, data):

3298

return traverse_obj(data, (

3299

'frameworkUpdates', 'entityBatchUpdate', 'mutations',

3300

lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',

3301

'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {

3302

'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),

3303

'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},

3304

'value': ('intensityScoreNormalized', {float_or_none}),

3305

})) or None

3306

3307

def _extract_comment(self, comment_renderer, parent=None):

3308

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

info = {

'id': comment_id,

'text': self._get_text(comment_renderer, 'contentText'),

3315

'like_count': self._get_count(comment_renderer, 'voteCount'),

3316

'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),

3317

'author': self._get_text(comment_renderer, 'authorText'),

3318

'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),

3319

'parent': parent or 'root',

3320

}

3321

3322

# Timestamp is an estimate calculated from the current time and time_text

3323

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3324

timestamp = self._parse_time_text(time_text)

3325

3326

info.update({

3327

# FIXME: non-standard, but we need a way of showing that it is an estimate.

3328

'_time_text': time_text,

3329

'timestamp': timestamp,

3330

})

3331

3332

info['author_url'] = urljoin(

3333

'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (

3334

('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),

3335

expected_type=str, get_all=False))

3336

3337

author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')

3338

if author_is_uploader is not None:

3339

info['author_is_uploader'] = author_is_uploader

3340

3341

comment_abr = traverse_obj(

3342

comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)

3343

if comment_abr is not None:

3344

info['is_favorited'] = 'creatorHeart' in comment_abr

3345

3346

badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])

3347

if self._has_badge(badges, BadgeType.VERIFIED):

3348

info['author_is_verified'] = True

3349

3350

is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')

3351

if is_pinned:

3352

info['is_pinned'] = True

return info

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3357

3358

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3359

3360

def extract_header(contents):

3361

_continuation = None

3362

for content in contents:

3363

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3364

expected_comment_count = self._get_count(

3365

comments_header_renderer, 'countText', 'commentsCount')

3366

3367

if expected_comment_count is not None:

3368

tracker['est_total'] = expected_comment_count

3369

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3370

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3371

3372

sort_menu_item = try_get(

3373

comments_header_renderer,

3374

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3375

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3376

3377

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3378

if not _continuation:

3379

continue

3380

3381

sort_text = str_or_none(sort_menu_item.get('title'))

3382

if not sort_text:

3383

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3384

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3389

if not parent:

3390

tracker['current_page_thread'] = 0

3391

for content in contents:

3392

if not parent and tracker['total_parent_comments'] >= max_parents:

3393

yield

3394

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3395

comment_renderer = get_first(

3396

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3397

expected_type=dict, default={})

3398

3399

comment = self._extract_comment(comment_renderer, parent)

3400

if not comment:

3401

continue

3402

comment_id = comment['id']

3403

if comment.get('is_pinned'):

3404

tracker['pinned_comment_ids'].add(comment_id)

3405

# Sometimes YouTube may break and give us infinite looping comments.

3406

# See: https://github.com/yt-dlp/yt-dlp/issues/6290

3407

if comment_id in tracker['seen_comment_ids']:

3408

if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):

3409

# Pinned comments may appear a second time in newest first sort

3410

# See: https://github.com/yt-dlp/yt-dlp/issues/6712

3411

continue

3412

self.report_warning(

3413

'Detected YouTube comments looping. Stopping comment extraction '

3414

f'{"for this thread" if parent else ""} as we probably cannot get any more.')

3415

yield

3416

else:

3417

tracker['seen_comment_ids'].add(comment['id'])

3418

3419

tracker['running_total'] += 1

3420

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3421

yield comment

3422

3423

# Attempt to get the replies

3424

comment_replies_renderer = try_get(

3425

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3426

3427

if comment_replies_renderer:

3428

tracker['current_page_thread'] += 1

3429

comment_entries_iter = self._comment_entries(

3430

comment_replies_renderer, ytcfg, video_id,

3431

parent=comment.get('id'), tracker=tracker)

3432

yield from itertools.islice(comment_entries_iter, min(

3433

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3434

3435

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=None,

current_page_thread=0,

3441

total_parent_comments=0,

3442

total_reply_comments=0,

3443

seen_comment_ids=set(),

3444

pinned_comment_ids=set()

)

# TODO: Deprecated

# YouTube comments have a max depth of 2

3449

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3450

if max_depth:

3451

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3452

'Set max replies in the max-comments extractor argument instead')

3453

if max_depth == 1 and parent:

3454

return

3455

3456

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3457

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3458

3459

continuation = self._extract_continuation(root_continuation_data)

3460

3461

response = None

3462

is_forced_continuation = False

3463

is_first_continuation = parent is None

3464

if is_first_continuation and not continuation:

3465

# Sometimes you can get comments by generating the continuation yourself,

3466

# even if YouTube initially reports them being disabled - e.g. stories comments.

3467

# Note: if the comment section is actually disabled, YouTube may return a response with

3468

# required check_get_keys missing. So we will disable that check initially in this case.

3469

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3470

is_forced_continuation = True

3471

3472

continuation_items_path = (

3473

'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')

3474

for page_num in itertools.count(0):

3475

if not continuation:

3476

break

3477

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3478

comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"

3479

if page_num == 0:

3480

if is_first_continuation:

3481

note_prefix = 'Downloading comment section API JSON'

3482

else:

3483

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3484

tracker['current_page_thread'], comment_prog_str)

3485

else:

3486

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3487

' ' if parent else '', ' replies' if parent else '',

3488

page_num, comment_prog_str)

3489

3490

# Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation

3491

# Ignore check if YouTube says the comment count is 0.

3492

check_get_keys = None

3493

if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):

3494

check_get_keys = [[*continuation_items_path, ..., (

3495

'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]

3496

try:

3497

response = self._extract_response(

3498

item_id=None, query=continuation,

3499

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3500

check_get_keys=check_get_keys)

3501

except ExtractorError as e:

3502

# Ignore incomplete data error for replies if retries didn't work.

3503

# This is to allow any other parent comments and comment threads to be downloaded.

3504

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3505

if 'incomplete data' in str(e).lower() and parent:

3506

if self.get_param('ignoreerrors') in (True, 'only_download'):

3507

self.report_warning(

3508

'Received incomplete data for a comment reply thread and retrying did not help. '

3509

'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')

3510

return

3511

else:

3512

raise ExtractorError(

3513

'Incomplete data received for comment reply thread. '

3514

'Pass --ignore-errors to ignore and allow rest of comments to download.',

3515

expected=True)

3516

raise

3517

is_forced_continuation = False

3518

continuation = None

3519

for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):

3520

if is_first_continuation:

3521

continuation = extract_header(continuation_items)

3522

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3536

if message and not parent and tracker['running_total'] == 0:

3537

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3538

raise self.CommentsDisabled

3539

3540

@staticmethod

3541

def _generate_comment_continuation(video_id):

3542

"""

3543

Generates initial comment section continuation token from given video id

3544

"""

3545

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3546

return base64.b64encode(token.encode()).decode()

3547

3548

def _get_comments(self, ytcfg, video_id, contents, webpage):

3549

"""Entry for comment extraction"""

3550

def _real_comment_extract(contents):

3551

renderer = next((

3552

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3553

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3554

yield from self._comment_entries(renderer, ytcfg, video_id)

3555

3556

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3557

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3558

3559

@staticmethod

3560

def _get_checkok_params():

3561

return {'contentCheckOk': True, 'racyCheckOk': True}

3562

3563

@classmethod

3564

def _generate_player_context(cls, sts=None):

3565

context = {

3566

'html5Preference': 'HTML5_PREF_WANTS',

3567

}

3568

if sts is not None:

3569

context['signatureTimestamp'] = sts

3570

return {

3571

'playbackContext': {

3572

'contentPlaybackContext': context

3573

},

3574

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3579

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3580

return True

3581

3582

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))

3583

AGE_GATE_REASONS = (

3584

'confirm your age', 'age-restricted', 'inappropriate', # reason

3585

'age_verification_required', 'age_check_required', # status

3586

)

3587

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3588

3589

@staticmethod

3590

def _is_unplayable(player_response):

3591

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3592

3593

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3594

3595

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3596

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3597

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3598

headers = self.generate_api_headers(

3599

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if _split_innertube_client(client)[0] == 'android':

3605

yt_query['params'] = 'CgIQBg=='

3606

3607

pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]

3608

if pp_arg:

3609

yt_query['params'] = pp_arg

3610

3611

yt_query.update(self._generate_player_context(sts))

3612

return self._extract_response(

3613

item_id=video_id, ep='player', query=yt_query,

3614

ytcfg=player_ytcfg, headers=headers, fatal=True,

3615

default_client=client,

3616

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3617

) or None

3618

3619

def _get_requested_clients(self, url, smuggled_data):

3620

requested_clients = []

3621

default = ['ios', 'android', 'web']

3622

allowed_clients = sorted(

3623

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3624

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3625

for client in self._configuration_arg('player_client'):

3626

if client in allowed_clients:

3627

requested_clients.append(client)

3628

elif client == 'default':

3629

requested_clients.extend(default)

3630

elif client == 'all':

3631

requested_clients.extend(allowed_clients)

3632

else:

3633

self.report_warning(f'Skipping unsupported client {client}')

3634

if not requested_clients:

3635

requested_clients = default

3636

3637

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3638

requested_clients.extend(

3639

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3640

3641

return orderedSet(requested_clients)

3642

3643

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3644

initial_pr = None

3645

if webpage:

3646

initial_pr = self._search_json(

3647

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3648

3649

all_clients = set(clients)

3650

clients = clients[::-1]

3651

prs = []

3652

3653

def append_client(*client_names):

3654

""" Append the first client name that exists but not already used """

3655

for client_name in client_names:

3656

actual_client = _split_innertube_client(client_name)[0]

3657

if actual_client in INNERTUBE_CLIENTS:

3658

if actual_client not in all_clients:

3659

clients.append(client_name)

3660

all_clients.add(actual_client)

3661

return

3662

3663

# Android player_response does not have microFormats which are needed for

3664

# extraction of some data. So we return the initial_pr with formats

3665

# stripped out even if not requested by the user

3666

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3667

if initial_pr:

3668

pr = dict(initial_pr)

3669

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3674

player_url = None

3675

while clients:

3676

client, base_client, variant = _split_innertube_client(clients.pop())

3677

player_ytcfg = master_ytcfg if client == 'web' else {}

3678

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3679

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3680

3681

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3682

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3683

if 'js' in self._configuration_arg('player_skip'):

3684

require_js_player = False

3685

player_url = None

3686

3687

if not player_url and not tried_iframe_fallback and require_js_player:

3688

player_url = self._download_player_url(video_id)

3689

tried_iframe_fallback = True

3690

3691

try:

3692

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3693

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3694

except ExtractorError as e:

3695

if last_error:

3696

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3702

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3703

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3704

if pr_video_id and pr_video_id != video_id:

3705

self.report_warning(

3706

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

3707

else:

3708

# Save client name for introspection later

3709

name = short_client_name(client)

3710

sd = traverse_obj(pr, ('streamingData', {dict})) or {}

3711

sd[STREAMING_DATA_CLIENT_NAME] = name

3712

for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):

3713

f[STREAMING_DATA_CLIENT_NAME] = name

3714

prs.append(pr)

3715

3716

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3717

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3718

append_client(f'{base_client}_creator')

3719

elif self._is_agegated(pr):

3720

if variant == 'tv_embedded':

3721

append_client(f'{base_client}_embedded')

3722

elif not variant:

3723

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3729

return prs, player_url

3730

3731

def _needs_live_processing(self, live_status, duration):

3732

if (live_status == 'is_live' and self.get_param('live_from_start')

3733

or live_status == 'post_live' and (duration or 0) > 2 * 3600):

3734

return live_status

3735

3736

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3737

CHUNK_SIZE = 10 << 20

3738

itags, stream_ids = collections.defaultdict(set), []

3739

itag_qualities, res_qualities = {}, {0: None}

3740

q = qualities([

3741

# Normally tiny is the smallest video-only formats. But

3742

# audio-only formats with unknown quality may get tagged as tiny

3743

'tiny',

3744

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3745

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3746

])

3747

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))

3748

format_types = self._configuration_arg('formats')

3749

all_formats = 'duplicate' in format_types

3750

if self._configuration_arg('include_duplicate_formats'):

3751

all_formats = True

3752

self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '

3753

'Use formats=duplicate extractor argument instead')

3754

3755

def build_fragments(f):

3756

return LazyList({

3757

'url': update_url_query(f['url'], {

3758

'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'

3759

})

3760

} for range_start in range(0, f['filesize'], CHUNK_SIZE))

3761

3762

for fmt in streaming_formats:

3763

if fmt.get('targetDurationSec'):

3764

continue

3765

3766

itag = str_or_none(fmt.get('itag'))

3767

audio_track = fmt.get('audioTrack') or {}

3768

stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))

3769

if not all_formats:

3770

if stream_id in stream_ids:

3771

continue

3772

3773

quality = fmt.get('quality')

3774

height = int_or_none(fmt.get('height'))

3775

if quality == 'tiny' or not quality:

3776

quality = fmt.get('audioQuality', '').lower() or quality

3777

# The 3gp format (17) in android client has a quality of "small",

3778

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3784

if height:

3785

res_qualities[height] = quality

3786

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3787

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3788

# number of fragment that would subsequently requested with (`&sq=N`)

3789

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3790

continue

3791

3792

fmt_url = fmt.get('url')

3793

if not fmt_url:

3794

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3795

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3796

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3797

if not all((sc, fmt_url, player_url, encrypted_sig)):

3798

continue

3799

try:

3800

fmt_url += '&%s=%s' % (

3801

traverse_obj(sc, ('sp', -1)) or 'signature',

3802

self._decrypt_signature(encrypted_sig, video_id, player_url)

3803

)

3804

except ExtractorError as e:

3805

self.report_warning('Signature extraction failed: Some formats may be missing',

3806

video_id=video_id, only_once=True)

3807

self.write_debug(e, only_once=True)

3808

continue

3809

3810

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3815

fmt_url = update_url_query(fmt_url, {

3816

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3817

})

3818

except ExtractorError as e:

3819

phantomjs_hint = ''

3820

if isinstance(e, JSInterpreter.Exception):

3821

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3822

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3823

if player_url:

3824

self.report_warning(

3825

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3826

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3827

self.write_debug(e, only_once=True)

3828

else:

3829

self.report_warning(

3830

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3831

video_id=video_id, only_once=True)

3832

throttled = True

3833

3834

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3835

language_preference = (

3836

10 if audio_track.get('audioIsDefault') and 10

3837

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3838

else -1)

3839

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3840

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3841

# Make sure to avoid false positives with small duration differences.

3842

# E.g. __2ABJjxzNo, ySuUZEjARPY

3843

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3844

if is_damaged:

3845

self.report_warning(

3846

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3847

3848

client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)

3849

name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''

3850

fps = int_or_none(fmt.get('fps')) or 0

3851

dct = {

3852

'asr': int_or_none(fmt.get('audioSampleRate')),

3853

'filesize': int_or_none(fmt.get('contentLength')),

3854

'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',

3855

'format_note': join_nonempty(

3856

join_nonempty(audio_track.get('displayName'),

3857

language_preference > 0 and ' (default)', delim=''),

3858

name, fmt.get('isDrc') and 'DRC',

3859

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3860

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3861

throttled and 'THROTTLED', is_damaged and 'DAMAGED',

3862

(self.get_param('verbose') or all_formats) and client_name,

3863

delim=', '),

3864

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3865

'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)

3866

+ (100 if 'Premium' in name else 0)),

3867

'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1

3868

'audio_channels': fmt.get('audioChannels'),

3869

'height': height,

3870

'quality': q(quality) - bool(fmt.get('isDrc')) / 2,

3871

'has_drm': bool(fmt.get('drmFamilies')),

3872

'tbr': tbr,

3873

'url': fmt_url,

3874

'width': int_or_none(fmt.get('width')),

3875

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3876

'desc' if language_preference < -1 else '') or None,

3877

'language_preference': language_preference,

3878

# Strictly de-prioritize damaged and 3gp formats

3879

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3880

}

3881

mime_mobj = re.match(

3882

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3883

if mime_mobj:

3884

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3885

dct.update(parse_codecs(mime_mobj.group(2)))

3886

if itag:

3887

itags[itag].add(('https', dct.get('language')))

3888

stream_ids.append(stream_id)

3889

single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))

3890

if single_stream and dct.get('ext'):

3891

dct['container'] = dct['ext'] + '_dash'

3892

3893

if (all_formats or 'dashy' in format_types) and dct['filesize']:

3894

yield {

3895

**dct,

3896

'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],

3897

'protocol': 'http_dash_segments',

3898

'fragments': build_fragments(dct),

3899

}

3900

if all_formats or 'dashy' not in format_types:

3901

dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}

3902

yield dct

3903

3904

needs_live_processing = self._needs_live_processing(live_status, duration)

3905

skip_bad_formats = 'incomplete' not in format_types

3906

if self._configuration_arg('include_incomplete_formats'):

3907

skip_bad_formats = False

3908

self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '

3909

'Use formats=incomplete extractor argument instead')

3910

3911

skip_manifests = set(self._configuration_arg('skip'))

3912

if (not self.get_param('youtube_include_hls_manifest', True)

3913

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3914

or needs_live_processing and skip_bad_formats):

3915

skip_manifests.add('hls')

3916

3917

if not self.get_param('youtube_include_dash_manifest', True):

3918

skip_manifests.add('dash')

3919

if self._configuration_arg('include_live_dash'):

3920

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3921

'Use formats=incomplete extractor argument instead')

3922

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3923

skip_manifests.add('dash')

3924

3925

def process_manifest_format(f, proto, client_name, itag):

3926

key = (proto, f.get('language'))

3927

if not all_formats and key in itags[itag]:

return False

itags[itag].add(key)

if itag and all_formats:

3932

f['format_id'] = f'{itag}-{proto}'

3933

elif any(p != proto for p, _ in itags[itag]):

3934

f['format_id'] = f'{itag}-{proto}'

3935

elif itag:

3936

f['format_id'] = itag

3937

3938

if f.get('source_preference') is None:

3939

f['source_preference'] = -1

3940

3941

if itag in ('616', '235'):

3942

f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')

3943

f['source_preference'] += 100

3944

3945

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3946

if f['quality'] == -1 and f.get('height'):

3947

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

3948

if self.get_param('verbose') or all_formats:

3949

f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')

3950

if f.get('fps') and f['fps'] <= 1:

3951

del f['fps']

3952

3953

if proto == 'hls' and f.get('has_drm'):

3954

f['has_drm'] = 'maybe'

3955

f['source_preference'] -= 5

return True

subtitles = {}

for sd in streaming_data:

3960

client_name = sd.get(STREAMING_DATA_CLIENT_NAME)

3961

3962

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3963

if hls_manifest_url:

3964

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3965

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3966

subtitles = self._merge_subtitles(subs, subtitles)

3967

for f in fmts:

3968

if process_manifest_format(f, 'hls', client_name, self._search_regex(

3969

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3970

yield f

3971

3972

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3973

if dash_manifest_url:

3974

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3975

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3976

for f in formats:

3977

if process_manifest_format(f, 'dash', client_name, f['format_id']):

3978

f['filesize'] = int_or_none(self._search_regex(

3979

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3980

if needs_live_processing:

3981

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3987

spec = get_first(

3988

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3989

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3994

args = args.split('#')

3995

counts = list(map(int_or_none, args[:5]))

3996

if len(args) != 8 or not all(counts):

3997

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3998

continue

3999

width, height, frame_count, cols, rows = counts

4000

N, sigh = args[6:]

4001

4002

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

4003

fragment_count = frame_count / (cols * rows)

4004

fragment_duration = duration / fragment_count

4005

yield {

4006

'format_id': f'sb{i}',

4007

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

4020

'duration': min(fragment_duration, duration - (j * fragment_duration)),

4021

} for j in range(math.ceil(fragment_count))],

4022

}

4023

4024

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

4025

webpage = None

4026

if 'webpage' not in self._configuration_arg('player_skip'):

4027

query = {'bpctr': '9999999999', 'has_verified': '1'}

4028

pp = self._configuration_arg('player_params', [None], casesense=True)[0]

4029

if pp:

4030

query['pp'] = pp

4031

webpage = self._download_webpage(

4032

webpage_url, video_id, fatal=False, query=query)

4033

4034

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

4035

4036

player_responses, player_url = self._extract_player_responses(

4037

self._get_requested_clients(url, smuggled_data),

4038

video_id, webpage, master_ytcfg, smuggled_data)

4039

4040

return webpage, master_ytcfg, player_responses, player_url

4041

4042

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

4043

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

4044

is_live = get_first(video_details, 'isLive')

4045

if is_live is None:

4046

is_live = get_first(live_broadcast_details, 'isLiveNow')

4047

live_content = get_first(video_details, 'isLiveContent')

4048

is_upcoming = get_first(video_details, 'isUpcoming')

4049

post_live = get_first(video_details, 'isPostLiveDvr')

4050

live_status = ('post_live' if post_live

4051

else 'is_live' if is_live

4052

else 'is_upcoming' if is_upcoming

4053

else 'was_live' if live_content

4054

else 'not_live' if False in (is_live, live_content)

4055

else None)

4056

streaming_data = traverse_obj(player_responses, (..., 'streamingData'))

4057

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

4058

if all(f.get('has_drm') for f in formats):

4059

# If there are no formats that definitely don't have DRM, all have DRM

for f in formats:

f['has_drm'] = True

return live_broadcast_details, live_status, streaming_data, formats, subtitles

4064

4065

def _real_extract(self, url):

4066

url, smuggled_data = unsmuggle_url(url, {})

4067

video_id = self._match_id(url)

4068

4069

base_url = self.http_scheme() + '//www.youtube.com/'

4070

webpage_url = base_url + 'watch?v=' + video_id

4071

4072

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

4073

4074

playability_statuses = traverse_obj(

4075

player_responses, (..., 'playabilityStatus'), expected_type=dict)

4076

4077

trailer_video_id = get_first(

4078

playability_statuses,

4079

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

4080

expected_type=str)

4081

if trailer_video_id:

4082

return self.url_result(

4083

trailer_video_id, self.ie_key(), trailer_video_id)

4084

4085

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

4086

if webpage else (lambda x: None))

4087

4088

video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)

4089

microformats = traverse_obj(

4090

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

4091

expected_type=dict)

4092

4093

translated_title = self._get_text(microformats, (..., 'title'))

4094

video_title = (self._preferred_lang and translated_title

4095

or get_first(video_details, 'title') # primary

4096

or translated_title

4097

or search_meta(['og:title', 'twitter:title', 'title']))

4098

translated_description = self._get_text(microformats, (..., 'description'))

4099

original_description = get_first(video_details, 'shortDescription')

4100

video_description = (

4101

self._preferred_lang and translated_description

4102

# If original description is blank, it will be an empty string.

4103

# Do not prefer translated description in this case.

4104

or original_description if original_description is not None else translated_description)

4105

4106

multifeed_metadata_list = get_first(

4107

player_responses,

4108

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

4109

expected_type=str)

4110

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

4111

if self.get_param('noplaylist'):

4112

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

4117

# Unquote should take place before split on comma (,) since textual

4118

# fields may contain comma as well (see

4119

# https://github.com/ytdl-org/youtube-dl/issues/8536)

4120

feed_data = urllib.parse.parse_qs(

4121

urllib.parse.unquote_plus(feed))

4122

4123

def feed_entry(name):

4124

return try_get(

4125

feed_data, lambda x: x[name][0], str)

4126

4127

feed_id = feed_entry('id')

4128

if not feed_id:

4129

continue

4130

feed_title = feed_entry('title')

4131

title = video_title

4132

if feed_title:

4133

title += ' (%s)' % feed_title

4134

entries.append({

4135

'_type': 'url_transparent',

4136

'ie_key': 'Youtube',

4137

'url': smuggle_url(

4138

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

4139

{'force_singlefeed': True}),

4140

'title': title,

4141

})

4142

feed_ids.append(feed_id)

4143

self.to_screen(

4144

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

4145

% (', '.join(feed_ids), video_id))

4146

return self.playlist_result(

4147

entries, video_id, video_title, video_description)

4148

4149

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

4150

or int_or_none(get_first(microformats, 'lengthSeconds'))

4151

or parse_duration(search_meta('duration')) or None)

4152

4153

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

4154

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

4155

if live_status == 'post_live':

4156

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

4157

4158

if not formats:

4159

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

4160

self.report_drm(video_id)

4161

pemr = get_first(

4162

playability_statuses,

4163

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

4164

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

4165

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

4166

if subreason:

4167

if subreason == 'The uploader has not made this video available in your country.':

4168

countries = get_first(microformats, 'availableCountries')

4169

if not countries:

4170

regions_allowed = search_meta('regionsAllowed')

4171

countries = regions_allowed.split(',') if regions_allowed else None

4172

self.raise_geo_restricted(subreason, countries, metadata_available=True)

4173

reason += f'. {subreason}'

4174

if reason:

4175

self.raise_no_formats(reason, expected=True)

4176

4177

keywords = get_first(video_details, 'keywords', expected_type=list) or []

4178

if not keywords and webpage:

4179

keywords = [

4180

unescapeHTML(m.group('content'))

4181

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

4182

for keyword in keywords:

4183

if keyword.startswith('yt:stretch='):

4184

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

4185

if mobj:

4186

# NB: float is intentional for forcing float division

4187

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

4192

f['stretched_ratio'] = ratio

4193

break

4194

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

4195

thumbnail_url = search_meta(['og:image', 'twitter:image'])

4196

if thumbnail_url:

4197

thumbnails.append({

4198

'url': thumbnail_url,

4199

})

4200

original_thumbnails = thumbnails.copy()

4201

4202

# The best resolution thumbnails sometimes does not appear in the webpage

4203

# See: https://github.com/yt-dlp/yt-dlp/issues/340

4204

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

4205

thumbnail_names = [

4206

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

4207

# in resolution, these are not the custom thumbnail. So de-prioritize them

4208

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

4209

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

4210

]

4211

n_thumbnail_names = len(thumbnail_names)

4212

thumbnails.extend({

4213

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

4214

video_id=video_id, name=name, ext=ext,

4215

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

4216

} for name in thumbnail_names for ext in ('webp', 'jpg'))

4217

for thumb in thumbnails:

4218

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

4219

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

4220

self._remove_duplicate_formats(thumbnails)

4221

self._downloader._sort_thumbnails(original_thumbnails)

4222

4223

category = get_first(microformats, 'category') or search_meta('genre')

4224

channel_id = self.ucid_or_none(str_or_none(

4225

get_first(video_details, 'channelId')

4226

or get_first(microformats, 'externalChannelId')

4227

or search_meta('channelId')))

4228

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

4229

4230

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

4231

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

4232

if not duration and live_end_time and live_start_time:

4233

duration = live_end_time - live_start_time

4234

4235

needs_live_processing = self._needs_live_processing(live_status, duration)

4236

4237

def is_bad_format(fmt):

4238

if needs_live_processing and not fmt.get('is_from_start'):

4239

return True

4240

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

4241

and fmt.get('protocol') == 'http_dash_segments'):

4242

return True

4243

4244

for fmt in filter(is_bad_format, formats):

4245

fmt['preference'] = (fmt.get('preference') or -1) - 10

4246

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')

4247

4248

if needs_live_processing:

4249

self._prepare_live_from_start_formats(

4250

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

4251

4252

formats.extend(self._extract_storyboard(player_responses, duration))

4253

4254

channel_handle = self.handle_from_url(owner_profile_url)

info = {

'id': video_id,

'title': video_title,

4259

'formats': formats,

4260

'thumbnails': thumbnails,

4261

# The best thumbnail that we are sure exists. Prevents unnecessary

4262

# URL checking if user don't care about getting the best possible thumbnail

4263

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

4264

'description': video_description,

4265

'channel_id': channel_id,

4266

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),

4267

'duration': duration,

4268

'view_count': int_or_none(

4269

get_first((video_details, microformats), (..., 'viewCount'))

4270

or search_meta('interactionCount')),

4271

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

4272

'age_limit': 18 if (

4273

get_first(microformats, 'isFamilySafe') is False

4274

or search_meta('isFamilyFriendly') == 'false'

4275

or search_meta('og:restrictions:age') == '18+') else 0,

4276

'webpage_url': webpage_url,

4277

'categories': [category] if category else None,

4278

'tags': keywords,

4279

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

4280

'live_status': live_status,

4281

'release_timestamp': live_start_time,

4282

'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats

4283

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

4288

if pctr:

4289

def get_lang_code(track):

4290

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

4291

or track.get('languageCode'))

4292

4293

# Converted into dicts to remove duplicates

4294

captions = {

4295

get_lang_code(sub): sub

4296

for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}

4297

translation_languages = {

4298

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

4299

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}

4300

4301

def process_language(container, base_url, lang_code, sub_name, query):

4302

lang_subs = container.setdefault(lang_code, [])

4303

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4314

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4315

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4316

for lang_code, caption_track in captions.items():

4317

base_url = caption_track.get('baseUrl')

4318

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4319

if not base_url:

4320

continue

4321

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4322

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4327

if not caption_track.get('isTranslatable'):

4328

continue

4329

for trans_code, trans_name in translation_languages.items():

4330

if not trans_code:

4331

continue

4332

orig_trans_code = trans_code

4333

if caption_track.get('kind') != 'asr' and trans_code != 'und':

4334

if not get_translated_subs:

4335

continue

4336

trans_code += f'-{lang_code}'

4337

trans_name += format_field(lang_name, None, ' from %s')

4338

if lang_code == f'a-{orig_trans_code}':

4339

# Set audio language based on original subtitles

4340

for f in formats:

4341

if f.get('acodec') != 'none' and not f.get('language'):

4342

f['language'] = orig_trans_code

4343

# Add an "-orig" label to the original language so that it can be distinguished.

4344

# The subs are returned without "-orig" as well for compatibility

4345

process_language(

4346

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4347

# Setting tlang=lang returns damaged subtitles.

4348

process_language(automatic_captions, base_url, trans_code, trans_name,

4349

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4350

4351

info['automatic_captions'] = automatic_captions

4352

info['subtitles'] = subtitles

4353

4354

parsed_url = urllib.parse.urlparse(url)

4355

for component in [parsed_url.fragment, parsed_url.query]:

4356

query = urllib.parse.parse_qs(component)

4357

for k, v in query.items():

4358

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4359

d_k += '_time'

4360

if d_k not in info and k in s_ks:

4361

info[d_k] = parse_duration(query[k][0])

4362

4363

# Youtube Music Auto-generated description

4364

if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):

4365

# XXX: Causes catastrophic backtracking if description has "·"

4366

# E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI

4367

# Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x

4368

# reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2

4369

mobj = re.search(

4370

r'''(?xs)

4371

(?=(?P<track>[^\n·]+))(?P=track)·

4372

(?=(?P<artist>[^\n]+))(?P=artist)\n+

4373

(?=(?P<album>[^\n]+))(?P=album)\n

4374

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4375

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4376

(.+?\nArtist\s*:\s*

4377

(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n

4378

)?.+\nAuto-generated\ by\ YouTube\.\s*$

4379

''', video_description)

4380

if mobj:

4381

release_year = mobj.group('release_year')

4382

release_date = mobj.group('release_date')

4383

if release_date:

4384

release_date = release_date.replace('-', '')

4385

if not release_year:

4386

release_year = release_date[:4]

4387

info.update({

4388

'album': mobj.group('album'.strip()),

4389

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4390

'track': mobj.group('track').strip(),

4391

'release_date': release_date,

4392

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4398

if not traverse_obj(initial_data, 'contents'):

4399

self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')

4400

initial_data = None

4401

if not initial_data:

4402

query = {'videoId': video_id}

4403

query.update(self._get_checkok_params())

4404

initial_data = self._extract_response(

4405

item_id=video_id, ep='next', fatal=False,

4406

ytcfg=master_ytcfg, query=query, check_get_keys='contents',

4407

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4408

note='Downloading initial data API JSON')

4409

4410

info['comment_count'] = traverse_obj(initial_data, (

4411

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4412

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'

4413

), (

4414

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4415

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'

4416

), expected_type=self._get_count, get_all=False)

4417

4418

try: # This will error if there is no livechat

4419

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4420

except (KeyError, IndexError, TypeError):

4421

pass

4422

else:

4423

info.setdefault('subtitles', {})['live_chat'] = [{

4424

# url is needed to set cookies

4425

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4426

'video_id': video_id,

4427

'ext': 'json',

4428

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4429

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4435

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4436

or self._extract_chapters_from_description(video_description, duration)

4437

or None)

4438

4439

info['heatmap'] = self._extract_heatmap(initial_data)

4440

4441

contents = traverse_obj(

4442

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4443

expected_type=list, default=[])

4444

4445

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4446

if vpir:

4447

stl = vpir.get('superTitleLink')

4448

if stl:

4449

stl = self._get_text(stl)

4450

if try_get(

4451

vpir,

4452

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4453

info['location'] = stl

4454

else:

4455

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4456

if mobj:

4457

info.update({

4458

'series': mobj.group(1),

4459

'season_number': int(mobj.group(2)),

4460

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, ('toggleButtonRenderer', ...),

4469

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))

4470

for tbr in tbrs:

4471

for getter, regex in [(

4472

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4473

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4474

lambda x: x['accessibility'],

4475

lambda x: x['accessibilityData']['accessibilityData'],

4476

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4477

label = (try_get(tbr, getter, dict) or {}).get('label')

4478

if label:

4479

mobj = re.match(regex, label)

4480

if mobj:

4481

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4482

break

4483

sbr_tooltip = try_get(

4484

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4485

if sbr_tooltip:

4486

like_count, dislike_count = sbr_tooltip.split(' / ')

4487

info.update({

4488

'like_count': str_to_int(like_count),

4489

'dislike_count': str_to_int(dislike_count),

4490

})

4491

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4492

if vcr:

4493

vc = self._get_count(vcr, 'viewCount')

4494

# Upcoming premieres with waiting count are treated as live here

4495

if vcr.get('isLive'):

4496

info['concurrent_view_count'] = vc

4497

elif info.get('view_count') is None:

4498

info['view_count'] = vc

4499

4500

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4501

if vsir:

4502

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4503

info.update({

4504

'channel': self._get_text(vor, 'title'),

4505

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

4506

4507

if not channel_handle:

4508

channel_handle = self.handle_from_url(

4509

traverse_obj(vor, (

4510

('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),

4511

(('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),

4512

{str}), get_all=False))

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4517

list) or []

4518

multiple_songs = False

4519

for row in rows:

4520

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4521

multiple_songs = True

4522

break

4523

for row in rows:

4524

mrr = row.get('metadataRowRenderer') or {}

4525

mrr_title = mrr.get('title')

4526

if not mrr_title:

4527

continue

4528

mrr_title = self._get_text(mrr, 'title')

4529

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4530

if mrr_title == 'License':

4531

info['license'] = mrr_contents_text

4532

elif not multiple_songs:

4533

if mrr_title == 'Album':

4534

info['album'] = mrr_contents_text

4535

elif mrr_title == 'Artist':

4536

info['artist'] = mrr_contents_text

4537

elif mrr_title == 'Song':

4538

info['track'] = mrr_contents_text

4539

owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))

4540

if self._has_badge(owner_badges, BadgeType.VERIFIED):

4541

info['channel_is_verified'] = True

4542

4543

info.update({

4544

'uploader': info.get('channel'),

4545

'uploader_id': channel_handle,

4546

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

4547

})

4548

# The upload date for scheduled, live and past live streams / premieres in microformats

4549

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4550

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4551

upload_date = (

4552

unified_strdate(get_first(microformats, 'uploadDate'))

4553

or unified_strdate(search_meta('uploadDate')))

4554

if not upload_date or (

4555

live_status in ('not_live', None)

4556

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4557

):

4558

upload_date = strftime_or_none(

4559

self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date

4560

info['upload_date'] = upload_date

4561

4562

if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):

4563

# Newly uploaded videos' HLS formats are potentially problematic and need to be checked

4564

upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)

4565

if upload_datetime >= datetime_from_str('today-2days'):

4566

for fmt in info['formats']:

4567

if fmt.get('protocol') == 'm3u8_native':

4568

fmt['__needs_testing'] = True

4569

4570

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(vpir, 'badges'))

4576

4577

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4578

or get_first(video_details, 'isPrivate', expected_type=bool))

4579

4580

info['availability'] = (

4581

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4582

else self._availability(

4583

is_private=is_private,

4584

needs_premium=(

4585

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4586

or False if initial_data and is_private is not None else None),

4587

needs_subscription=(

4588

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4589

or False if initial_data and is_private is not None else None),

4590

needs_auth=info['age_limit'] >= 18,

4591

is_unlisted=None if is_private is None else (

4592

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4593

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4594

4595

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4596

4597

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4603

@staticmethod

4604

def passthrough_smuggled_data(func):

4605

def _smuggle(info, smuggled_data):

4606

if info.get('_type') not in ('url', 'url_transparent'):

4607

return info

4608

if smuggled_data.get('is_music_url'):

4609

parsed_url = urllib.parse.urlparse(info['url'])

4610

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4611

smuggled_data.pop('is_music_url')

4612

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4613

if smuggled_data:

4614

info['url'] = smuggle_url(info['url'], smuggled_data)

4615

return info

4616

4617

@functools.wraps(func)

4618

def wrapper(self, url):

4619

url, smuggled_data = unsmuggle_url(url, {})

4620

if self.is_music_url(url):

4621

smuggled_data['is_music_url'] = True

4622

info_dict = func(self, url, smuggled_data)

4623

if smuggled_data:

4624

_smuggle(info_dict, smuggled_data)

4625

if info_dict.get('entries'):

4626

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

@staticmethod

def _extract_basic_item_renderer(item):

4632

# Modified from _extract_grid_item_renderer

4633

known_basic_renderers = (

4634

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4635

)

4636

for key, renderer in item.items():

4637

if not isinstance(renderer, dict):

4638

continue

4639

elif key in known_basic_renderers:

4640

return renderer

4641

elif key.startswith('grid') and key.endswith('Renderer'):

4642

return renderer

4643

4644

def _extract_channel_renderer(self, renderer):

4645

channel_id = self.ucid_or_none(renderer['channelId'])

4646

title = self._get_text(renderer, 'title')

4647

channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)

4648

channel_handle = self.handle_from_url(

4649

traverse_obj(renderer, (

4650

'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),

4651

('browseEndpoint', 'canonicalBaseUrl')),

4652

{str}), get_all=False))

4653

if not channel_handle:

4654

# As of 2023-06-01, YouTube sets subscriberCountText to the handle in search

4655

channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))

return {

'_type': 'url',

'url': channel_url,

'id': channel_id,

'ie_key': YoutubeTabIE.ie_key(),

4661

'channel': title,

4662

'uploader': title,

4663

'channel_id': channel_id,

4664

'channel_url': channel_url,

4665

'title': title,

4666

'uploader_id': channel_handle,

4667

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

4668

# See above. YouTube sets videoCountText to the subscriber text in search channel renderers.

4669

# However, in feed/channels this is set correctly to the subscriber count

4670

'channel_follower_count': traverse_obj(

4671

renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),

4672

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

4673

'playlist_count': (

4674

# videoCountText may be the subscriber count

4675

self._get_count(renderer, 'videoCountText')

4676

if self._get_count(renderer, 'subscriberCountText') is not None else None),

4677

'description': self._get_text(renderer, 'descriptionSnippet'),

4678

'channel_is_verified': True if self._has_badge(

4679

self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,

4680

}

4681

4682

def _grid_entries(self, grid_renderer):

4683

for item in grid_renderer['items']:

4684

if not isinstance(item, dict):

4685

continue

4686

renderer = self._extract_basic_item_renderer(item)

4687

if not isinstance(renderer, dict):

4688

continue

4689

title = self._get_text(renderer, 'title')

4690

4691

# playlist

4692

playlist_id = renderer.get('playlistId')

4693

if playlist_id:

4694

yield self.url_result(

4695

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4696

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4701

if video_id:

4702

yield self._extract_video(renderer)

4703

continue

4704

# channel

4705

channel_id = renderer.get('channelId')

4706

if channel_id:

4707

yield self._extract_channel_renderer(renderer)

4708

continue

4709

# generic endpoint URL support

4710

ep_url = urljoin('https://www.youtube.com/', try_get(

4711

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4712

str))

4713

if ep_url:

4714

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4715

if ie.suitable(ep_url):

4716

yield self.url_result(

4717

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4718

break

4719

4720

def _music_reponsive_list_entry(self, renderer):

4721

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4722

if video_id:

4723

title = traverse_obj(renderer, (

4724

'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',

4725

'text', 'runs', 0, 'text'))

4726

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4727

ie=YoutubeIE.ie_key(), video_id=video_id, title=title)

4728

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4729

if playlist_id:

4730

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4731

if video_id:

4732

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4733

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4734

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4735

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4736

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4737

if browse_id:

4738

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4739

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4740

4741

def _shelf_entries_from_content(self, shelf_renderer):

4742

content = shelf_renderer.get('content')

4743

if not isinstance(content, dict):

4744

return

4745

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4746

if renderer:

4747

# TODO: add support for nested playlists so each shelf is processed

4748

# as separate playlist

4749

# TODO: this includes only first N items

4750

yield from self._grid_entries(renderer)

4751

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4757

ep = try_get(

4758

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4759

str)

4760

shelf_url = urljoin('https://www.youtube.com', ep)

4761

if shelf_url:

4762

# Skipping links to another channels, note that checking for

4763

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4764

# will not work

4765

if skip_channels and '/channels?' in shelf_url:

4766

return

4767

title = self._get_text(shelf_renderer, 'title')

4768

yield self.url_result(shelf_url, video_title=title)

4769

# Shelf may not contain shelf URL, fallback to extraction from content

4770

yield from self._shelf_entries_from_content(shelf_renderer)

4771

4772

def _playlist_entries(self, video_list_renderer):

4773

for content in video_list_renderer['contents']:

4774

if not isinstance(content, dict):

4775

continue

4776

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4777

if not isinstance(renderer, dict):

4778

continue

4779

video_id = renderer.get('videoId')

4780

if not video_id:

4781

continue

4782

yield self._extract_video(renderer)

4783

4784

def _rich_entries(self, rich_grid_renderer):

4785

renderer = traverse_obj(

4786

rich_grid_renderer,

4787

('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}

4788

video_id = renderer.get('videoId')

4789

if video_id:

4790

yield self._extract_video(renderer)

4791

return

4792

playlist_id = renderer.get('playlistId')

4793

if playlist_id:

4794

yield self.url_result(

4795

f'https://www.youtube.com/playlist?list={playlist_id}',

4796

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4797

video_title=self._get_text(renderer, 'title'))

4798

return

4799

4800

def _video_entry(self, video_renderer):

4801

video_id = video_renderer.get('videoId')

4802

if video_id:

4803

return self._extract_video(video_renderer)

4804

4805

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4806

url = urljoin('https://youtube.com', traverse_obj(

4807

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4808

if url:

4809

return self.url_result(

4810

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4811

4812

def _post_thread_entries(self, post_thread_renderer):

4813

post_renderer = try_get(

4814

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4815

if not post_renderer:

4816

return

4817

# video attachment

4818

video_renderer = try_get(

4819

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4820

video_id = video_renderer.get('videoId')

4821

if video_id:

4822

entry = self._extract_video(video_renderer)

4823

if entry:

4824

yield entry

4825

# playlist attachment

4826

playlist_id = try_get(

4827

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4828

if playlist_id:

4829

yield self.url_result(

4830

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4831

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4832

# inline video links

4833

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4834

for run in runs:

4835

if not isinstance(run, dict):

4836

continue

4837

ep_url = try_get(

4838

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4839

if not ep_url:

4840

continue

4841

if not YoutubeIE.suitable(ep_url):

4842

continue

4843

ep_video_id = YoutubeIE._match_id(ep_url)

4844

if video_id == ep_video_id:

4845

continue

4846

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4847

4848

def _post_thread_continuation_entries(self, post_thread_continuation):

4849

contents = post_thread_continuation.get('contents')

4850

if not isinstance(contents, list):

4851

return

4852

for content in contents:

4853

renderer = content.get('backstagePostThreadRenderer')

4854

if isinstance(renderer, dict):

4855

yield from self._post_thread_entries(renderer)

4856

continue

4857

renderer = content.get('videoRenderer')

4858

if isinstance(renderer, dict):

4859

yield self._video_entry(renderer)

4860

4861

r''' # unused

4862

def _rich_grid_entries(self, contents):

4863

for content in contents:

4864

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4865

if video_renderer:

4866

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4872

for url in traverse_obj(renderer, (

4873

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4874

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4875

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4876

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4877

4878

def _extract_entries(self, parent_renderer, continuation_list):

4879

# continuation_list is modified in-place with continuation_list = [continuation_token]

4880

continuation_list[:] = [None]

4881

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4882

for content in contents:

4883

if not isinstance(content, dict):

4884

continue

4885

is_renderer = traverse_obj(

4886

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4887

expected_type=dict)

4888

if not is_renderer:

4889

if content.get('richItemRenderer'):

4890

for entry in self._rich_entries(content['richItemRenderer']):

4891

yield entry

4892

continuation_list[0] = self._extract_continuation(parent_renderer)

4893

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4894

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4895

yield from self._report_history_entries(table)

4896

continuation_list[0] = self._extract_continuation(table)

4897

continue

4898

4899

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4900

for isr_content in isr_contents:

4901

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4906

'gridRenderer': self._grid_entries,

4907

'reelShelfRenderer': self._grid_entries,

4908

'shelfRenderer': self._shelf_entries,

4909

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4910

'backstagePostThreadRenderer': self._post_thread_entries,

4911

'videoRenderer': lambda x: [self._video_entry(x)],

4912

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4913

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4914

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],

4915

'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),

4916

}

4917

for key, renderer in isr_content.items():

4918

if key not in known_renderers:

4919

continue

4920

for entry in known_renderers[key](renderer):

4921

if entry:

4922

yield entry

4923

continuation_list[0] = self._extract_continuation(renderer)

4924

break

4925

4926

if not continuation_list[0]:

4927

continuation_list[0] = self._extract_continuation(is_renderer)

4928

4929

if not continuation_list[0]:

4930

continuation_list[0] = self._extract_continuation(parent_renderer)

4931

4932

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4933

continuation_list = [None]

4934

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4935

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4940

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4941

yield from extract_entries(parent_renderer)

4942

continuation = continuation_list[0]

4943

seen_continuations = set()

4944

for page_num in itertools.count(1):

4945

if not continuation:

4946

break

4947

continuation_token = continuation.get('continuation')

4948

if continuation_token is not None and continuation_token in seen_continuations:

4949

self.write_debug('Detected YouTube feed looping - assuming end of feed.')

4950

break

4951

seen_continuations.add(continuation_token)

4952

headers = self.generate_api_headers(

4953

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4954

response = self._extract_response(

4955

item_id=f'{item_id} page {page_num}',

4956

query=continuation, headers=headers, ytcfg=ytcfg,

4957

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4962

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4963

visitor_data = self._extract_visitor_data(response) or visitor_data

4964

4965

known_renderers = {

4966

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4967

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4968

'gridVideoRenderer': (self._grid_entries, 'items'),

4969

'gridChannelRenderer': (self._grid_entries, 'items'),

4970

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4971

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4972

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4973

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4974

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4975

'playlistVideoListContinuation': (self._playlist_entries, None),

4976

'gridContinuation': (self._grid_entries, None),

4977

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4978

'sectionListContinuation': (extract_entries, None), # for feeds

4979

}

4980

4981

continuation_items = traverse_obj(response, (

4982

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4983

'appendContinuationItemsAction', 'continuationItems'

4984

), 'continuationContents', get_all=False)

4985

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4986

4987

video_items_renderer = None

4988

for key in continuation_item.keys():

4989

if key not in known_renderers:

4990

continue

4991

func, parent_key = known_renderers[key]

4992

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4993

continuation_list = [None]

4994

yield from func(video_items_renderer)

4995

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4996

4997

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

5002

for tab_renderer in tabs:

5003

if tab_renderer.get('selected'):

5004

return tab_renderer

5005

if fatal:

5006

raise ExtractorError('Unable to find selected tab')

5007

5008

@staticmethod

5009

def _extract_tab_renderers(response):

5010

return traverse_obj(

5011

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

5012

5013

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

5014

metadata = self._extract_metadata_from_tabs(item_id, data)

5015

5016

selected_tab = self._extract_selected_tab(tabs)

5017

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

5018

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

5019

5020

return self.playlist_result(

5021

self._entries(

5022

selected_tab, metadata['id'], ytcfg,

5023

self._extract_account_syncid(ytcfg, data),

5024

self._extract_visitor_data(data, ytcfg)),

5025

**metadata)

5026

5027

def _extract_metadata_from_tabs(self, item_id, data):

5028

info = {'id': item_id}

5029

5030

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

5031

if metadata_renderer:

5032

channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),

5033

('channelUrl', {self.ucid_from_url}))

5034

info.update({

5035

'channel': metadata_renderer.get('title'),

5036

'channel_id': channel_id,

5037

})

5038

if info['channel_id']:

5039

info['id'] = info['channel_id']

5040

else:

5041

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

5042

5043

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

5044

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

5045

def _get_uncropped(url):

5046

return url_or_none((url or '').split('=')[0] + '=s0')

5047

5048

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

5049

if avatar_thumbnails:

5050

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

5051

if uncropped_avatar:

5052

avatar_thumbnails.append({

5053

'url': uncropped_avatar,

5054

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

5059

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

5060

for banner in channel_banners:

5061

banner['preference'] = -10

5062

5063

if channel_banners:

5064

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

5065

if uncropped_banner:

5066

channel_banners.append({

5067

'url': uncropped_banner,

5068

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

5073

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

5074

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

5075

5076

primary_thumbnails = self._extract_thumbnails(

5077

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

5078

playlist_thumbnails = self._extract_thumbnails(

5079

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

5080

5081

info.update({

5082

'title': (traverse_obj(metadata_renderer, 'title')

5083

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

5084

or info['id']),

5085

'availability': self._extract_availability(data),

5086

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

5087

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

5088

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

5089

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

})

channel_handle = (

traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)

5094

or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))

if channel_handle:

info.update({

'uploader_id': channel_handle,

5099

'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),

5100

})

5101

5102

channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))

5103

if self._has_badge(channel_badges, BadgeType.VERIFIED):

5104

info['channel_is_verified'] = True

5105

# Playlist stats is a text runs array containing [video count, view count, last updated].

5106

# last updated or (view count and last updated) may be missing.

5107

playlist_stats = get_first(

5108

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

5109

5110

last_updated_unix = self._parse_time_text(

5111

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

5112

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

5113

info['modified_date'] = strftime_or_none(last_updated_unix)

5114

5115

info['view_count'] = self._get_count(playlist_stats, 1)

5116

if info['view_count'] is None: # 0 is allowed

5117

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

5118

if info['view_count'] is None:

5119

info['view_count'] = self._get_count(data, (

5120

'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',

5121

'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))

5122

5123

info['playlist_count'] = self._get_count(playlist_stats, 0)

5124

if info['playlist_count'] is None: # 0 is allowed

5125

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

5126

5127

if not info.get('channel_id'):

5128

owner = traverse_obj(playlist_header_renderer, 'ownerText')

5129

if not owner: # Deprecated

5130

owner = traverse_obj(

5131

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

5132

('videoOwner', 'videoOwnerRenderer', 'title'))

5133

owner_text = self._get_text(owner)

5134

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

5135

info.update({

5136

'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

5137

'channel_id': self.ucid_or_none(browse_ep.get('browseId')),

5138

'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))

})

info.update({

'uploader': info['channel'],

5143

'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),

5144

'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

5150

first_id = last_id = response = None

5151

for page_num in itertools.count(1):

5152

videos = list(self._playlist_entries(playlist))

5153

if not videos:

5154

return

5155

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

5156

if start >= len(videos):

5157

return

5158

yield from videos[start:]

5159

first_id = first_id or videos[0]['id']

5160

last_id = videos[-1]['id']

5161

watch_endpoint = try_get(

5162

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

5163

headers = self.generate_api_headers(

5164

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5165

visitor_data=self._extract_visitor_data(response, data, ytcfg))

5166

query = {

5167

'playlistId': playlist_id,

5168

'videoId': watch_endpoint.get('videoId') or last_id,

5169

'index': watch_endpoint.get('index') or len(videos),

5170

'params': watch_endpoint.get('params') or 'OAE%3D'

5171

}

5172

response = self._extract_response(

5173

item_id='%s page %d' % (playlist_id, page_num),

5174

query=query, ep='next', headers=headers, ytcfg=ytcfg,

5175

check_get_keys='contents'

5176

)

5177

playlist = try_get(

5178

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

5179

5180

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

5181

title = playlist.get('title') or try_get(

5182

data, lambda x: x['titleText']['simpleText'], str)

5183

playlist_id = playlist.get('playlistId') or item_id

5184

5185

# Delegating everything except mix playlists to regular tab-based playlist URL

5186

playlist_url = urljoin(url, try_get(

5187

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

5188

str))

5189

5190

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

5191

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

5192

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

5193

5194

if playlist_url and playlist_url != url and not is_known_unviewable:

5195

return self.url_result(

5196

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

5197

video_title=title)

5198

5199

return self.playlist_result(

5200

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

5201

playlist_id=playlist_id, playlist_title=title)

5202

5203

def _extract_availability(self, data):

5204

"""

5205

Gets the availability of a given playlist/tab.

5206

Note: Unless YouTube tells us explicitly, we do not assume it is public

5207

@param data: response

5208

"""

5209

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

5210

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

5211

player_header_privacy = playlist_header_renderer.get('privacy')

5212

5213

badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))

5214

5215

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

5216

privacy_setting_icon = get_first(

5217

(playlist_header_renderer, sidebar_renderer),

5218

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

5219

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

5220

expected_type=str)

5221

5222

microformats_is_unlisted = traverse_obj(

5223

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

5228

or player_header_privacy == 'PUBLIC'

5229

or privacy_setting_icon == 'PRIVACY_PUBLIC')

5230

else self._availability(

5231

is_private=(

5232

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

5233

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

5234

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

5235

is_unlisted=(

5236

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

5237

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

5238

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

5239

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

5240

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

5241

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

5246

sidebar_renderer = try_get(

5247

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

5248

for item in sidebar_renderer:

5249

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

5254

"""

5255

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

5256

"""

5257

is_playlist = bool(traverse_obj(

5258

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

5259

if not is_playlist:

5260

return

5261

headers = self.generate_api_headers(

5262

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5263

visitor_data=self._extract_visitor_data(data, ytcfg))

5264

query = {

5265

'params': 'wgYCCAA=',

5266

'browseId': f'VL{item_id}'

5267

}

5268

return self._extract_response(

5269

item_id=item_id, headers=headers, query=query,

5270

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

5271

note='Redownloading playlist API JSON with unavailable videos')

5272

5273

@functools.cached_property

5274

def skip_webpage(self):

5275

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

5276

5277

def _extract_webpage(self, url, item_id, fatal=True):

5278

webpage, data = None, None

5279

for retry in self.RetryManager(fatal=fatal):

5280

try:

5281

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

5282

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

5283

except ExtractorError as e:

5284

if isinstance(e.cause, network_exceptions):

5285

if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):

5286

retry.error = e

5287

continue

5288

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

5293

except ExtractorError as e:

5294

self._error_or_warning(e, fatal=fatal)

5295

break

5296

5297

# Sometimes youtube returns a webpage with incomplete ytInitialData

5298

# See: https://github.com/yt-dlp/yt-dlp/issues/116

5299

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

5300

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

5306

"""Use if failed to extract ytcfg (and data) from initial webpage"""

5307

if not ytcfg and self.is_authenticated:

5308

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

5309

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

5310

raise ExtractorError(

5311

f'{msg}. If you are not downloading private content, or '

5312

'your cookies are only for the first account and channel,'

5313

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

5314

expected=True)

5315

self.report_warning(msg, only_once=True)

5316

5317

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

5318

data = None

5319

if not self.skip_webpage:

5320

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

5321

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

5322

# Reject webpage data if redirected to home page without explicitly requesting

5323

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

5324

if (url != 'https://www.youtube.com/feed/recommended'

5325

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

5326

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

5327

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

5328

if fatal:

5329

raise ExtractorError(msg, expected=True)

5330

self.report_warning(msg, only_once=True)

5331

if not data:

5332

self._report_playlist_authcheck(ytcfg, fatal=fatal)

5333

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

5334

return data, ytcfg

5335

5336

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

5337

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

5338

resolve_response = self._extract_response(

5339

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

5340

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

5341

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

5342

for ep_key, ep in endpoints.items():

5343

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

5344

if params:

5345

return self._extract_response(

5346

item_id=item_id, query=params, ep=ep, headers=headers,

5347

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

5348

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

5349

err_note = 'Failed to resolve url (does the playlist exist?)'

5350

if fatal:

5351

raise ExtractorError(err_note, expected=True)

5352

self.report_warning(err_note, item_id)

5353

5354

_SEARCH_PARAMS = None

5355

5356

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

5357

data = {'query': query}

5358

if params is NO_DEFAULT:

5359

params = self._SEARCH_PARAMS

5360

if params:

5361

data['params'] = params

5362

5363

content_keys = (

5364

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

5365

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

5366

# ytmusic search

5367

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

5368

('continuationContents', ),

5369

)

5370

display_id = f'query "{query}"'

5371

check_get_keys = tuple({keys[0] for keys in content_keys})

5372

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

5373

self._report_playlist_authcheck(ytcfg, fatal=False)

5374

5375

continuation_list = [None]

5376

search = None

5377

for page_num in itertools.count(1):

5378

data.update(continuation_list[0] or {})

5379

headers = self.generate_api_headers(

5380

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

5381

search = self._extract_response(

5382

item_id=f'{display_id} page {page_num}', ep='search', query=data,

5383

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

5384

slr_contents = traverse_obj(search, *content_keys)

5385

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

5386

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

5391

IE_DESC = 'YouTube Tabs'

5392

_VALID_URL = r'''(?x:

5393

https?://

5394

(?!consent\.)(?:\w+\.)?

5395

(?:

5396

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5401

(?P<not_channel>

5402

feed/|hashtag/|

5403

(?:playlist|watch)\?.*?\blist=

5404

)|

5405

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5410

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5411

}

5412

IE_NAME = 'youtube:tab'

5413

5414

_TESTS = [{

5415

'note': 'playlists, multipage',

5416

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5417

'playlist_mincount': 94,

5418

'info_dict': {

5419

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5420

'title': 'Igor Kleiner - Playlists',

5421

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5422

'uploader': 'Igor Kleiner',

5423

'uploader_id': '@IgorDataScience',

5424

'uploader_url': 'https://www.youtube.com/@IgorDataScience',

5425

'channel': 'Igor Kleiner',

5426

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5427

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5428

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5429

'channel_follower_count': int

5430

},

5431

}, {

5432

'note': 'playlists, multipage, different order',

5433

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5434

'playlist_mincount': 94,

5435

'info_dict': {

5436

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5437

'title': 'Igor Kleiner - Playlists',

5438

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5439

'uploader': 'Igor Kleiner',

5440

'uploader_id': '@IgorDataScience',

5441

'uploader_url': 'https://www.youtube.com/@IgorDataScience',

5442

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5443

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5444

'channel': 'Igor Kleiner',

5445

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5446

'channel_follower_count': int

5447

},

5448

}, {

5449

'note': 'playlists, series',

5450

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5451

'playlist_mincount': 5,

5452

'info_dict': {

5453

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5454

'title': '3Blue1Brown - Playlists',

5455

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5456

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5457

'channel': '3Blue1Brown',

5458

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5459

'uploader_id': '@3blue1brown',

5460

'uploader_url': 'https://www.youtube.com/@3blue1brown',

5461

'uploader': '3Blue1Brown',

5462

'tags': ['Mathematics'],

5463

'channel_follower_count': int,

5464

'channel_is_verified': True,

5465

},

5466

}, {

5467

'note': 'playlists, singlepage',

5468

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5469

'playlist_mincount': 4,

5470

'info_dict': {

5471

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5472

'title': 'ThirstForScience - Playlists',

5473

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5474

'uploader': 'ThirstForScience',

5475

'uploader_url': 'https://www.youtube.com/@ThirstForScience',

5476

'uploader_id': '@ThirstForScience',

5477

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5478

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5479

'tags': 'count:13',

5480

'channel': 'ThirstForScience',

5481

'channel_follower_count': int

5482

}

5483

}, {

5484

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5485

'only_matching': True,

5486

}, {

5487

'note': 'basic, single video playlist',

5488

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5489

'info_dict': {

5490

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5491

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5496

'channel': 'Sergey M.',

5497

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5498

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5499

'availability': 'public',

5500

'uploader': 'Sergey M.',

5501

'uploader_url': 'https://www.youtube.com/@sergeym.6173',

5502

'uploader_id': '@sergeym.6173',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5507

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5508

'info_dict': {

5509

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5510

'title': 'youtube-dl empty playlist',

5511

'tags': [],

5512

'channel': 'Sergey M.',

5513

'description': '',

5514

'modified_date': '20160902',

5515

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5516

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5517

'availability': 'public',

5518

'uploader_url': 'https://www.youtube.com/@sergeym.6173',

5519

'uploader_id': '@sergeym.6173',

5520

'uploader': 'Sergey M.',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5526

'info_dict': {

5527

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5528

'title': 'lex will - Home',

5529

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5530

'uploader': 'lex will',

5531

'uploader_id': '@lexwill718',

5532

'channel': 'lex will',

5533

'tags': ['bible', 'history', 'prophesy'],

5534

'uploader_url': 'https://www.youtube.com/@lexwill718',

5535

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5536

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5537

'channel_follower_count': int

5538

},

5539

'playlist_mincount': 2,

5540

}, {

5541

'note': 'Videos tab',

5542

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5543

'info_dict': {

5544

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5545

'title': 'lex will - Videos',

5546

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5547

'uploader': 'lex will',

5548

'uploader_id': '@lexwill718',

5549

'tags': ['bible', 'history', 'prophesy'],

5550

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5551

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5552

'uploader_url': 'https://www.youtube.com/@lexwill718',

5553

'channel': 'lex will',

5554

'channel_follower_count': int

5555

},

5556

'playlist_mincount': 975,

5557

}, {

5558

'note': 'Videos tab, sorted by popular',

5559

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5560

'info_dict': {

5561

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5562

'title': 'lex will - Videos',

5563

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5564

'uploader': 'lex will',

5565

'uploader_id': '@lexwill718',

5566

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5567

'uploader_url': 'https://www.youtube.com/@lexwill718',

5568

'channel': 'lex will',

5569

'tags': ['bible', 'history', 'prophesy'],

5570

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5571

'channel_follower_count': int

5572

},

5573

'playlist_mincount': 199,

5574

}, {

5575

'note': 'Playlists tab',

5576

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5577

'info_dict': {

5578

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5579

'title': 'lex will - Playlists',

5580

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5581

'uploader': 'lex will',

5582

'uploader_id': '@lexwill718',

5583

'uploader_url': 'https://www.youtube.com/@lexwill718',

5584

'channel': 'lex will',

5585

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5586

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5587

'tags': ['bible', 'history', 'prophesy'],

5588

'channel_follower_count': int

5589

},

5590

'playlist_mincount': 17,

5591

}, {

5592

'note': 'Community tab',

5593

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5594

'info_dict': {

5595

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5596

'title': 'lex will - Community',

5597

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5598

'channel': 'lex will',

5599

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5600

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5601

'tags': ['bible', 'history', 'prophesy'],

5602

'channel_follower_count': int,

5603

'uploader_url': 'https://www.youtube.com/@lexwill718',

5604

'uploader_id': '@lexwill718',

5605

'uploader': 'lex will',

5606

},

5607

'playlist_mincount': 18,

5608

}, {

5609

'note': 'Channels tab',

5610

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5611

'info_dict': {

5612

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5613

'title': 'lex will - Channels',

5614

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5615

'channel': 'lex will',

5616

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5617

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5618

'tags': ['bible', 'history', 'prophesy'],

5619

'channel_follower_count': int,

5620

'uploader_url': 'https://www.youtube.com/@lexwill718',

5621

'uploader_id': '@lexwill718',

5622

'uploader': 'lex will',

5623

},

5624

'playlist_mincount': 12,

5625

}, {

5626

'note': 'Search tab',

5627

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5628

'playlist_mincount': 40,

5629

'info_dict': {

5630

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5631

'title': '3Blue1Brown - Search - linear algebra',

5632

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5633

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5634

'tags': ['Mathematics'],

5635

'channel': '3Blue1Brown',

5636

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5637

'channel_follower_count': int,

5638

'uploader_url': 'https://www.youtube.com/@3blue1brown',

5639

'uploader_id': '@3blue1brown',

5640

'uploader': '3Blue1Brown',

5641

'channel_is_verified': True,

5642

},

5643

}, {

5644

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5645

'only_matching': True,

5646

}, {

5647

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5648

'only_matching': True,

5649

}, {

5650

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5651

'only_matching': True,

5652

}, {

5653

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5654

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5655

'info_dict': {

5656

'title': '29C3: Not my department',

5657

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5658

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5659

'tags': [],

5660

'view_count': int,

5661

'modified_date': '20150605',

5662

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5663

'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',

5664

'channel': 'Christiaan008',

5665

'availability': 'public',

5666

'uploader_id': '@ChRiStIaAn008',

5667

'uploader': 'Christiaan008',

5668

'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',

5669

},

5670

'playlist_count': 96,

5671

}, {

5672

'note': 'Large playlist',

5673

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5674

'info_dict': {

5675

'title': 'Uploads from Cauchemar',

5676

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5677

'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',

5678

'tags': [],

5679

'modified_date': r're:\d{8}',

5680

'channel': 'Cauchemar',

5681

'view_count': int,

5682

'description': '',

5683

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5684

'availability': 'public',

5685

'uploader_id': '@Cauchemar89',

5686

'uploader': 'Cauchemar',

5687

'uploader_url': 'https://www.youtube.com/@Cauchemar89',

5688

},

5689

'playlist_mincount': 1123,

5690

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5691

}, {

5692

'note': 'even larger playlist, 8832 videos',

5693

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5694

'only_matching': True,

5695

}, {

5696

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5697

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5698

'info_dict': {

5699

'title': 'Uploads from Interstellar Movie',

5700

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5701

'tags': [],

5702

'view_count': int,

5703

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5704

'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',

5705

'channel': 'Interstellar Movie',

5706

'description': '',

5707

'modified_date': r're:\d{8}',

5708

'availability': 'public',

5709

'uploader_id': '@InterstellarMovie',

5710

'uploader': 'Interstellar Movie',

5711

'uploader_url': 'https://www.youtube.com/@InterstellarMovie',

5712

},

5713

'playlist_mincount': 21,

5714

}, {

5715

'note': 'Playlist with "show unavailable videos" button',

5716

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5717

'info_dict': {

5718

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5719

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5720

'view_count': int,

5721

'channel': 'Phim Siêu Nhân Nhật Bản',

5722

'tags': [],

5723

'description': '',

5724

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5725

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5726

'modified_date': r're:\d{8}',

5727

'availability': 'public',

5728

'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',

5729

'uploader_id': '@phimsieunhannhatban',

5730

'uploader': 'Phim Siêu Nhân Nhật Bản',

5731

},

5732

'playlist_mincount': 200,

5733

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5734

}, {

5735

'note': 'Playlist with unavailable videos in page 7',

5736

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5737

'info_dict': {

5738

'title': 'Uploads from BlankTV',

5739

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5740

'channel': 'BlankTV',

5741

'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',

5742

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5743

'view_count': int,

5744

'tags': [],

5745

'modified_date': r're:\d{8}',

5746

'description': '',

5747

'availability': 'public',

5748

'uploader_id': '@blanktv',

5749

'uploader': 'BlankTV',

5750

'uploader_url': 'https://www.youtube.com/@blanktv',

5751

},

5752

'playlist_mincount': 1000,

5753

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5754

}, {

5755

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5756

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5757

'info_dict': {

5758

'title': 'Data Analysis with Dr Mike Pound',

5759

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5760

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5761

'tags': [],

5762

'view_count': int,

5763

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5764

'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',

5765

'channel': 'Computerphile',

5766

'availability': 'public',

5767

'modified_date': '20190712',

5768

'uploader_id': '@Computerphile',

5769

'uploader': 'Computerphile',

5770

'uploader_url': 'https://www.youtube.com/@Computerphile',

5771

},

5772

'playlist_mincount': 11,

5773

}, {

5774

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5775

'only_matching': True,

5776

}, {

5777

'note': 'Playlist URL that does not actually serve a playlist',

5778

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5783

'upload_date': '20150526',

5784

'license': 'Standard YouTube License',

5785

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5786

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5793

},

5794

'skip': 'This video is not available.',

5795

'add_ie': [YoutubeIE.ie_key()],

5796

}, {

5797

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5798

'only_matching': True,

5799

}, {

5800

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5801

'only_matching': True,

5802

}, {

5803

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5804

'info_dict': {

5805

'id': 'hGkQjiJLjWQ', # This will keep changing

5806

'ext': 'mp4',

5807

'title': str,

5808

'upload_date': r're:\d{8}',

5809

'description': str,

5810

'categories': ['News & Politics'],

5811

'tags': list,

5812

'like_count': int,

5813

'release_timestamp': int,

5814

'channel': 'Sky News',

5815

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5816

'age_limit': 0,

5817

'view_count': int,

5818

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5819

'playable_in_embed': True,

5820

'release_date': r're:\d+',

5821

'availability': 'public',

5822

'live_status': 'is_live',

5823

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5824

'channel_follower_count': int,

5825

'concurrent_view_count': int,

5826

'uploader_url': 'https://www.youtube.com/@SkyNews',

5827

'uploader_id': '@SkyNews',

5828

'uploader': 'Sky News',

5829

'channel_is_verified': True,

5830

},

5831

'params': {

5832

'skip_download': True,

5833

},

5834

'expected_warnings': ['Ignoring subtitle tracks found in '],

5835

}, {

5836

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5841

'upload_date': '20150715',

5842

'license': 'Standard YouTube License',

5843

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5844

'categories': ['News & Politics'],

5845

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5850

},

5851

'only_matching': True,

5852

}, {

5853

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5854

'only_matching': True,

5855

}, {

5856

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5857

'only_matching': True,

5858

}, {

5859

'note': 'A channel that is not live. Should raise error',

5860

'url': 'https://www.youtube.com/user/numberphile/live',

5861

'only_matching': True,

5862

}, {

5863

'url': 'https://www.youtube.com/feed/trending',

5864

'only_matching': True,

5865

}, {

5866

'url': 'https://www.youtube.com/feed/library',

5867

'only_matching': True,

5868

}, {

5869

'url': 'https://www.youtube.com/feed/history',

5870

'only_matching': True,

5871

}, {

5872

'url': 'https://www.youtube.com/feed/subscriptions',

5873

'only_matching': True,

5874

}, {

5875

'url': 'https://www.youtube.com/feed/watch_later',

5876

'only_matching': True,

5877

}, {

5878

'note': 'Recommended - redirects to home page.',

5879

'url': 'https://www.youtube.com/feed/recommended',

5880

'only_matching': True,

5881

}, {

5882

'note': 'inline playlist with not always working continuations',

5883

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5884

'only_matching': True,

5885

}, {

5886

'url': 'https://www.youtube.com/course',

5887

'only_matching': True,

5888

}, {

5889

'url': 'https://www.youtube.com/zsecurity',

5890

'only_matching': True,

5891

}, {

5892

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5893

'only_matching': True,

5894

}, {

5895

'url': 'https://www.youtube.com/TheYoungTurks/live',

5896

'only_matching': True,

5897

}, {

5898

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5905

}, {

5906

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5907

'only_matching': True,

5908

}, {

5909

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5910

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5911

'only_matching': True

5912

}, {

5913

'note': '/browse/ should redirect to /channel/',

5914

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5915

'only_matching': True

5916

}, {

5917

'note': 'VLPL, should redirect to playlist?list=PL...',

5918

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5919

'info_dict': {

5920

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5921

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5922

'title': 'NCS : All Releases 💿',

5923

'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',

5924

'modified_date': r're:\d{8}',

5925

'view_count': int,

5926

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5927

'tags': [],

5928

'channel': 'NoCopyrightSounds',

5929

'availability': 'public',

5930

'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',

5931

'uploader': 'NoCopyrightSounds',

5932

'uploader_id': '@NoCopyrightSounds',

5933

},

5934

'playlist_mincount': 166,

5935

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],

5936

}, {

5937

# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos

5938

'note': 'Topic, should redirect to playlist?list=UU...',

5939

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5940

'info_dict': {

5941

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5942

'title': 'Uploads from Royalty Free Music - Topic',

5943

'tags': [],

5944

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5945

'channel': 'Royalty Free Music - Topic',

5946

'view_count': int,

5947

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5948

'modified_date': r're:\d{8}',

5949

'description': '',

5950

'availability': 'public',

5951

'uploader': 'Royalty Free Music - Topic',

5952

},

5953

'playlist_mincount': 101,

5954

'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],

5955

}, {

5956

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5957

# Treat as a general feed

5958

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5959

'info_dict': {

5960

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5961

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5962

'tags': [],

5963

},

5964

'playlist_mincount': 9,

5965

}, {

5966

'note': 'Youtube music Album',

5967

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5968

'info_dict': {

5969

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5970

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5975

'modified_date': r're:\d{8}',

5976

},

5977

'playlist_count': 50,

5978

'expected_warnings': ['YouTube Music is not directly supported'],

5979

}, {

5980

'note': 'unlisted single video playlist',

5981

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5982

'info_dict': {

5983

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5984

'title': 'yt-dlp unlisted playlist test',

5985

'availability': 'unlisted',

5986

'tags': [],

5987

'modified_date': '20220418',

5988

'channel': 'colethedj',

5989

'view_count': int,

5990

'description': '',

5991

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5992

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5993

'uploader_url': 'https://www.youtube.com/@colethedj1894',

5994

'uploader_id': '@colethedj1894',

5995

'uploader': 'colethedj',

},

'playlist': [{

'info_dict': {

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

'id': 'BaW_jenozKc',

'_type': 'url',

'ie_key': 'Youtube',

'duration': 10,

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

6005

'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

6006

'view_count': int,

6007

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',

6008

'channel': 'Philipp Hagemeister',

6009

'uploader_id': '@PhilippHagemeister',

6010

'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',

6011

'uploader': 'Philipp Hagemeister',

}

}],

'playlist_count': 1,

'params': {'extract_flat': True},

6016

}, {

6017

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

6018

'url': 'https://www.youtube.com/feed/recommended',

6019

'info_dict': {

6020

'id': 'recommended',

6021

'title': 'recommended',

6022

'tags': [],

6023

},

6024

'playlist_mincount': 50,

6025

'params': {

6026

'skip_download': True,

6027

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

6028

},

6029

}, {

6030

'note': 'API Fallback: /videos tab, sorted by oldest first',

6031

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

6032

'info_dict': {

6033

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

6034

'title': 'Cody\'sLab - Videos',

6035

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

6036

'channel': 'Cody\'sLab',

6037

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

6038

'tags': [],

6039

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

6040

'channel_follower_count': int

6041

},

6042

'playlist_mincount': 650,

6043

'params': {

6044

'skip_download': True,

6045

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

6046

},

6047

'skip': 'Query for sorting no longer works',

6048

}, {

6049

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

6050

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

6051

'info_dict': {

6052

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

6053

'title': 'Uploads from Royalty Free Music - Topic',

6054

'modified_date': r're:\d{8}',

6055

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

6056

'description': '',

6057

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

6058

'tags': [],

6059

'channel': 'Royalty Free Music - Topic',

6060

'view_count': int,

6061

'availability': 'public',

6062

'uploader': 'Royalty Free Music - Topic',

6063

},

6064

'playlist_mincount': 101,

6065

'params': {

6066

'skip_download': True,

6067

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

6068

},

6069

'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],

6070

}, {

6071

'note': 'non-standard redirect to regional channel',

6072

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

6073

'only_matching': True

6074

}, {

6075

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

6076

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

6077

'info_dict': {

6078

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

6079

'modified_date': '20220407',

6080

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

6081

'tags': [],

6082

'availability': 'unlisted',

6083

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

6084

'channel': 'pukkandan',

6085

'description': 'Test for collaborative playlist',

6086

'title': 'yt-dlp test - collaborative playlist',

6087

'view_count': int,

6088

'uploader_url': 'https://www.youtube.com/@pukkandan',

6089

'uploader_id': '@pukkandan',

6090

'uploader': 'pukkandan',

6091

},

6092

'playlist_mincount': 2

6093

}, {

6094

'note': 'translated tab name',

6095

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

6096

'info_dict': {

6097

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6098

'tags': [],

6099

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6100

'description': 'test description',

6101

'title': 'cole-dlp-test-acc - 再生リスト',

6102

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6103

'channel': 'cole-dlp-test-acc',

6104

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6105

'uploader_id': '@coletdjnz',

6106

'uploader': 'cole-dlp-test-acc',

6107

},

6108

'playlist_mincount': 1,

6109

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

6110

'expected_warnings': ['Preferring "ja"'],

6111

}, {

6112

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

6113

'note': 'preferred lang set with playlist with translated video titles',

6114

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

6115

'info_dict': {

6116

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

6117

'tags': [],

6118

'view_count': int,

6119

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6120

'channel': 'cole-dlp-test-acc',

6121

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6122

'description': 'test',

6123

'title': 'dlp test playlist',

6124

'availability': 'public',

6125

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6126

'uploader_id': '@coletdjnz',

6127

'uploader': 'cole-dlp-test-acc',

6128

},

6129

'playlist_mincount': 1,

6130

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

6131

'expected_warnings': ['Preferring "ja"'],

6132

}, {

6133

# shorts audio pivot for 2GtVksBMYFM.

6134

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

6135

'info_dict': {

6136

'id': 'sfv_audio_pivot',

6137

'title': 'sfv_audio_pivot',

6138

'tags': [],

6139

},

6140

'playlist_mincount': 50,

6141

6142

}, {

6143

# Channel with a real live tab (not to be mistaken with streams tab)

6144

# Do not treat like it should redirect to live stream

6145

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

6146

'info_dict': {

6147

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

6148

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

6149

'tags': [],

6150

},

6151

'playlist_mincount': 20,

6152

}, {

6153

# Tab name is not the same as tab id

6154

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

6155

'info_dict': {

6156

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6157

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

6158

'tags': [],

6159

},

6160

'playlist_mincount': 8,

6161

}, {

6162

# Home tab id is literally home. Not to get mistaken with featured

6163

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

6164

'info_dict': {

6165

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6166

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

6167

'tags': [],

6168

},

6169

'playlist_mincount': 8,

6170

}, {

6171

# Should get three playlists for videos, shorts and streams tabs

6172

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

6173

'info_dict': {

6174

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

6175

'title': 'Polka Ch. 尾丸ポルカ',

6176

'channel_follower_count': int,

6177

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

6178

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

6179

'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',

6180

'channel': 'Polka Ch. 尾丸ポルカ',

6181

'tags': 'count:35',

6182

'uploader_url': 'https://www.youtube.com/@OmaruPolka',

6183

'uploader': 'Polka Ch. 尾丸ポルカ',

6184

'uploader_id': '@OmaruPolka',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

6189

# TODO: fix channel description

6190

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

6191

'info_dict': {

6192

'id': 'UC0intLFzLaudFG-xAvUEO-A',

6193

'title': 'Not Just Bikes - Shorts',

6194

'tags': 'count:12',

6195

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

6196

'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',

6197

'channel_follower_count': int,

6198

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

6199

'channel': 'Not Just Bikes',

6200

'uploader_url': 'https://www.youtube.com/@NotJustBikes',

6201

'uploader': 'Not Just Bikes',

6202

'uploader_id': '@NotJustBikes',

6203

},

6204

'playlist_mincount': 10,

6205

}, {

6206

# Streams tab

6207

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

6208

'info_dict': {

6209

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

6210

'title': '中村悠一 - Live',

6211

'tags': 'count:7',

6212

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

6213

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

6214

'channel': '中村悠一',

6215

'channel_follower_count': int,

6216

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

6217

'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',

6218

'uploader_id': '@Yuichi-Nakamura',

6219

'uploader': '中村悠一',

6220

},

6221

'playlist_mincount': 60,

6222

}, {

6223

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

6224

# See test_youtube_lists

6225

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

6226

'only_matching': True,

6227

}, {

6228

# No uploads and no UCID given. Should fail with no uploads error

6229

# See test_youtube_lists

6230

'url': 'https://www.youtube.com/news',

6231

'only_matching': True

6232

}, {

6233

# No videos tab but has a shorts tab

6234

'url': 'https://www.youtube.com/c/TKFShorts',

6235

'info_dict': {

6236

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6237

'title': 'Shorts Break - Shorts',

6238

'tags': 'count:48',

6239

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6240

'channel': 'Shorts Break',

6241

'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',

6242

'channel_follower_count': int,

6243

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

6244

'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',

6245

'uploader': 'Shorts Break',

6246

'uploader_id': '@ShortsBreak_Official',

6247

},

6248

'playlist_mincount': 30,

6249

}, {

6250

# Trending Now Tab. tab id is empty

6251

'url': 'https://www.youtube.com/feed/trending',

6252

'info_dict': {

6253

'id': 'trending',

6254

'title': 'trending - Now',

6255

'tags': [],

6256

},

6257

'playlist_mincount': 30,

6258

}, {

6259

# Trending Gaming Tab. tab id is empty

6260

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

6261

'info_dict': {

6262

'id': 'trending',

6263

'title': 'trending - Gaming',

6264

'tags': [],

6265

},

6266

'playlist_mincount': 30,

6267

}, {

6268

# Shorts url result in shorts tab

6269

# TODO: Fix channel id extraction

6270

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

6271

'info_dict': {

6272

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6273

'title': 'cole-dlp-test-acc - Shorts',

6274

'channel': 'cole-dlp-test-acc',

6275

'description': 'test description',

6276

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6277

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6278

'tags': [],

6279

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6280

'uploader_id': '@coletdjnz',

6281

'uploader': 'cole-dlp-test-acc',

},

'playlist': [{

'info_dict': {

# Channel data is not currently available for short renderers (as of 2023-03-01)

6286

'_type': 'url',

6287

'ie_key': 'Youtube',

6288

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

6289

'id': 'sSM9J5YH_60',

6290

'title': 'SHORT short',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

6296

}, {

6297

# Live video status should be extracted

6298

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

6299

'info_dict': {

6300

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6301

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

6309

'id': str,

6310

'title': str,

6311

'live_status': 'is_live',

6312

'channel_id': str,

6313

'channel_url': str,

6314

'concurrent_view_count': int,

'channel': str,

'uploader': str,

'uploader_url': str,

'uploader_id': str,

'channel_is_verified': bool, # this will keep changing

6320

}

6321

}],

6322

'params': {'extract_flat': True, 'playlist_items': '1'},

6323

'playlist_mincount': 1

6324

}, {

6325

# Channel renderer metadata. Contains number of videos on the channel

6326

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',

6327

'info_dict': {

6328

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6329

'title': 'cole-dlp-test-acc - Channels',

6330

'channel': 'cole-dlp-test-acc',

6331

'description': 'test description',

6332

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6333

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6334

'tags': [],

6335

'uploader_url': 'https://www.youtube.com/@coletdjnz',

6336

'uploader_id': '@coletdjnz',

6337

'uploader': 'cole-dlp-test-acc',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'YoutubeTab',

6343

'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6344

'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6345

'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6346

'title': 'PewDiePie',

6347

'channel': 'PewDiePie',

6348

'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6349

'thumbnails': list,

6350

'channel_follower_count': int,

6351

'playlist_count': int,

6352

'uploader': 'PewDiePie',

6353

'uploader_url': 'https://www.youtube.com/@PewDiePie',

6354

'uploader_id': '@PewDiePie',

6355

'channel_is_verified': True,

6356

}

6357

}],

6358

'params': {'extract_flat': True},

6359

}, {

6360

'url': 'https://www.youtube.com/@3blue1brown/about',

6361

'info_dict': {

6362

'id': 'UCYO_jab_esuFRV4b17AJtAw',

6363

'tags': ['Mathematics'],

6364

'title': '3Blue1Brown - About',

6365

'channel_follower_count': int,

6366

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

6367

'channel': '3Blue1Brown',

6368

'view_count': int,

6369

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

6370

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

6371

'uploader_url': 'https://www.youtube.com/@3blue1brown',

6372

'uploader_id': '@3blue1brown',

6373

'uploader': '3Blue1Brown',

6374

'channel_is_verified': True,

},

'playlist_count': 0,

}, {

# Podcasts tab, with rich entry playlistRenderers

6379

'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',

6380

'info_dict': {

6381

'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',

6382

'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',

6383

'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',

6384

'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',

6385

'title': '99 Percent Invisible - Podcasts',

6386

'uploader': '99 Percent Invisible',

6387

'channel_follower_count': int,

6388

'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',

6389

'tags': [],

6390

'channel': '99 Percent Invisible',

6391

'uploader_id': '@99percentinvisiblepodcast',

},

'playlist_count': 1,

}, {

# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)

6396

'url': 'https://www.youtube.com/@AHimitsu/releases',

6397

'info_dict': {

6398

'id': 'UCgFwu-j5-xNJml2FtTrrB3A',

6399

'channel': 'A Himitsu',

6400

'uploader_url': 'https://www.youtube.com/@AHimitsu',

6401

'title': 'A Himitsu - Releases',

6402

'uploader_id': '@AHimitsu',

6403

'uploader': 'A Himitsu',

6404

'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',

6405

'tags': 'count:16',

6406

'description': 'I make music',

6407

'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',

6408

'channel_follower_count': int,

6409

'channel_is_verified': True,

6410

},

6411

'playlist_mincount': 10,

6412

}, {

6413

# Playlist with only shorts, shown as reel renderers

6414

# FIXME: future: YouTube currently doesn't give continuation for this,

6415

# may do in future.

6416

'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',

6417

'info_dict': {

6418

'id': 'UUxqPAgubo4coVn9Lx1FuKcg',

6419

'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',

6420

'view_count': int,

6421

'uploader_id': '@BangyShorts',

6422

'description': '',

6423

'uploader_url': 'https://www.youtube.com/@BangyShorts',

6424

'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',

6425

'channel': 'Bangy Shorts',

6426

'uploader': 'Bangy Shorts',

6427

'tags': [],

6428

'availability': 'public',

6429

'modified_date': '20230626',

6430

'title': 'Uploads from Bangy Shorts',

6431

},

6432

'playlist_mincount': 100,

6433

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

}]

@classmethod

def suitable(cls, url):

6438

return False if YoutubeIE.suitable(url) else super().suitable(url)

6439

6440

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

6441

6442

def _get_url_mobj(self, url):

6443

mobj = self._URL_RE.match(url).groupdict()

6444

mobj.update((k, '') for k, v in mobj.items() if v is None)

6445

return mobj

6446

6447

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

6448

tab_name = (tab.get('title') or '').lower()

6449

tab_url = urljoin(base_url, traverse_obj(

6450

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

6451

6452

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

6453

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

6454

if tab_id:

6455

return {

6456

'TAB_ID_SPONSORSHIPS': 'membership',

6457

}.get(tab_id, tab_id), tab_name

6458

6459

# Fallback to tab name if we cannot get the tab id.

6460

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

6461

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

6462

if tab_name:

6463

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

6468

6469

def _has_tab(self, tabs, tab_id):

6470

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

6471

6472

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

6473

def _real_extract(self, url, smuggled_data):

6474

item_id = self._match_id(url)

6475

url = urllib.parse.urlunparse(

6476

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

6477

compat_opts = self.get_param('compat_opts', [])

6478

6479

mobj = self._get_url_mobj(url)

6480

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

6481

if is_channel and smuggled_data.get('is_music_url'):

6482

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

6483

return self.url_result(

6484

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

6485

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

6486

mdata = self._extract_tab_endpoint(

6487

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

6488

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

6489

get_all=False, expected_type=str)

6490

if not murl:

6491

raise ExtractorError('Failed to resolve album to playlist')

6492

return self.url_result(murl, YoutubeTabIE)

6493

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

6494

return self.url_result(

6495

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

6496

6497

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

6498

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

6499

url = f'{pre}/videos{post}'

6500

if smuggled_data.get('is_music_url'):

6501

self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')

6502

6503

# Handle both video/playlist URLs

6504

qs = parse_qs(url)

6505

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

6506

if not video_id and mobj['not_channel'].startswith('watch'):

6507

if not playlist_id:

6508

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

6509

raise ExtractorError('A video URL was given without video ID', expected=True)

6510

# Common mistake: https://www.youtube.com/watch?list=playlist_id

6511

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

6512

return self.url_result(

6513

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

6514

6515

if not self._yes_playlist(playlist_id, video_id):

6516

return self.url_result(

6517

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6518

6519

data, ytcfg = self._extract_data(url, display_id)

6520

6521

# YouTube may provide a non-standard redirect to the regional channel

6522

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6523

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6524

redirect_url = traverse_obj(

6525

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6526

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6527

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6528

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6529

return self.url_result(redirect_url, YoutubeTabIE)

6530

6531

tabs, extra_tabs = self._extract_tab_renderers(data), []

6532

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6533

selected_tab = self._extract_selected_tab(tabs)

6534

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6535

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6536

6537

if not original_tab_id and selected_tab_name:

6538

self.to_screen('Downloading all uploads of the channel. '

6539

'To download only the videos in a specific tab, pass the tab\'s URL')

6540

if self._has_tab(tabs, 'streams'):

6541

extra_tabs.append(''.join((pre, '/streams', post)))

6542

if self._has_tab(tabs, 'shorts'):

6543

extra_tabs.append(''.join((pre, '/shorts', post)))

6544

# XXX: Members-only tab should also be extracted

6545

6546

if not extra_tabs and selected_tab_id != 'videos':

6547

# Channel does not have streams, shorts or videos tabs

6548

if item_id[:2] != 'UC':

6549

raise ExtractorError('This channel has no uploads', expected=True)

6550

6551

# Topic channels don't have /videos. Use the equivalent playlist instead

6552

pl_id = f'UU{item_id[2:]}'

6553

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6554

try:

6555

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6556

except ExtractorError:

6557

raise ExtractorError('This channel has no uploads', expected=True)

6558

else:

6559

item_id, url = pl_id, pl_url

6560

self.to_screen(

6561

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6562

6563

elif extra_tabs and selected_tab_id != 'videos':

6564

# When there are shorts/live tabs but not videos tab

6565

url, data = f'{pre}{post}', None

6566

6567

elif (original_tab_id or 'videos') != selected_tab_id:

6568

if original_tab_id == 'live':

6569

# Live tab should have redirected to the video

6570

# Except in the case the channel has an actual live tab

6571

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6572

raise UserNotLive(video_id=item_id)

6573

elif selected_tab_name:

6574

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6575

6576

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6577

url = f'{pre}{post}'

6578

6579

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6580

if 'no-youtube-unavailable-videos' not in compat_opts:

6581

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6582

self._extract_and_report_alerts(data, only_once=True)

6583

6584

tabs, entries = self._extract_tab_renderers(data), []

6585

if tabs:

6586

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6587

entries[0].update({

6588

'extractor_key': YoutubeTabIE.ie_key(),

6589

'extractor': YoutubeTabIE.IE_NAME,

6590

'webpage_url': url,

6591

})

6592

if self.get_param('playlist_items') == '0':

6593

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6594

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6595

entries.extend(map(self._real_extract, extra_tabs))

6596

6597

if len(entries) == 1:

6598

return entries[0]

6599

elif entries:

6600

metadata = self._extract_metadata_from_tabs(item_id, data)

6601

uploads_url = 'the Uploads (UU) playlist URL'

6602

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6603

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6604

self.to_screen(

6605

'Downloading as multiple playlists, separated by tabs. '

6606

f'To download as a single playlist instead, pass {uploads_url}')

6607

return self.playlist_result(entries, item_id, **metadata)

6608

6609

# Inline playlist

6610

playlist = traverse_obj(

6611

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6612

if playlist:

6613

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6614

6615

video_id = traverse_obj(

6616

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6617

if video_id:

6618

if tab != '/live': # live tab is expected to redirect to video

6619

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6620

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6621

6622

raise ExtractorError('Unable to recognize tab page')

6623

6624

6625

class YoutubePlaylistIE(InfoExtractor):

6626

IE_DESC = 'YouTube playlists'

6627

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6638

)''' % {

6639

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6640

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6641

}

6642

IE_NAME = 'youtube:playlist'

6643

_TESTS = [{

6644

'note': 'issue #673',

6645

'url': 'PLBB231211A4F62143',

6646

'info_dict': {

6647

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6648

'id': 'PLBB231211A4F62143',

6649

'uploader': 'Wickman',

6650

'uploader_id': '@WickmanVT',

6651

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6652

'view_count': int,

6653

'uploader_url': 'https://www.youtube.com/@WickmanVT',

6654

'modified_date': r're:\d{8}',

6655

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6656

'channel': 'Wickman',

6657

'tags': [],

6658

'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',

6659

'availability': 'public',

6660

},

6661

'playlist_mincount': 29,

6662

}, {

6663

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6664

'info_dict': {

6665

'title': 'YDL_safe_search',

6666

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6667

},

6668

'playlist_count': 2,

6669

'skip': 'This playlist is private',

6670

}, {

6671

'note': 'embedded',

6672

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6677

'uploader': 'milan',

6678

'uploader_id': '@milan5503',

6679

'description': '',

6680

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6681

'tags': [],

6682

'modified_date': '20140919',

6683

'view_count': int,

6684

'channel': 'milan',

6685

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6686

'uploader_url': 'https://www.youtube.com/@milan5503',

6687

'availability': 'public',

6688

},

6689

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],

6690

}, {

6691

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6692

'playlist_mincount': 455,

6693

'info_dict': {

6694

'title': '2018 Chinese New Singles (11/6 updated)',

6695

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6696

'uploader': 'LBK',

6697

'uploader_id': '@music_king',

6698

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6699

'channel': 'LBK',

6700

'view_count': int,

6701

'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',

6702

'tags': [],

6703

'uploader_url': 'https://www.youtube.com/@music_king',

6704

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6705

'modified_date': r're:\d{8}',

6706

'availability': 'public',

6707

},

6708

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6709

}, {

6710

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6711

'only_matching': True,

6712

}, {

6713

# music album playlist

6714

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6715

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6720

if YoutubeTabIE.suitable(url):

6721

return False

6722

from ..utils import parse_qs

6723

qs = parse_qs(url)

6724

if qs.get('v', [None])[0]:

6725

return False

6726

return super().suitable(url)

6727

6728

def _real_extract(self, url):

6729

playlist_id = self._match_id(url)

6730

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6731

url = update_url_query(

6732

'https://www.youtube.com/playlist',

6733

parse_qs(url) or {'list': playlist_id})

6734

if is_music_url:

6735

url = smuggle_url(url, {'is_music_url': True})

6736

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6737

6738

6739

class YoutubeYtBeIE(InfoExtractor):

6740

IE_DESC = 'youtu.be'

6741

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6742

_TESTS = [{

6743

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6748

'uploader': 'Backus-Page House Museum',

6749

'uploader_id': '@backuspagemuseum',

6750

'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',

6751

'upload_date': '20161008',

6752

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6753

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6758

'thumbnail': r're:^https?://.*\.webp',

6759

'channel': 'Backus-Page House Museum',

6760

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6761

'live_status': 'not_live',

6762

'view_count': int,

6763

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6764

'availability': 'public',

6765

'duration': 59,

6766

'comment_count': int,

6767

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6772

},

6773

}, {

6774

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6775

'only_matching': True,

6776

}]

6777

6778

def _real_extract(self, url):

6779

mobj = self._match_valid_url(url)

6780

video_id = mobj.group('id')

6781

playlist_id = mobj.group('playlist_id')

6782

return self.url_result(

6783

update_url_query('https://www.youtube.com/watch', {

6784

'v': video_id,

6785

'list': playlist_id,

6786

'feature': 'youtu.be',

6787

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6788

6789

6790

class YoutubeLivestreamEmbedIE(InfoExtractor):

6791

IE_DESC = 'YouTube livestream embeds'

6792

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6793

_TESTS = [{

6794

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6795

'only_matching': True,

6796

}]

6797

6798

def _real_extract(self, url):

6799

channel_id = self._match_id(url)

6800

return self.url_result(

6801

f'https://www.youtube.com/channel/{channel_id}/live',

6802

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6803

6804

6805

class YoutubeYtUserIE(InfoExtractor):

6806

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6807

IE_NAME = 'youtube:user'

6808

_VALID_URL = r'ytuser:(?P<id>.+)'

6809

_TESTS = [{

6810

'url': 'ytuser:phihag',

6811

'only_matching': True,

6812

}]

6813

6814

def _real_extract(self, url):

6815

user_id = self._match_id(url)

6816

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6817

6818

6819

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6820

IE_NAME = 'youtube:favorites'

6821

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6822

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6823

_LOGIN_REQUIRED = True

6824

_TESTS = [{

6825

'url': ':ytfav',

6826

'only_matching': True,

6827

}, {

6828

'url': ':ytfavorites',

6829

'only_matching': True,

6830

}]

6831

6832

def _real_extract(self, url):

6833

return self.url_result(

6834

'https://www.youtube.com/playlist?list=LL',

6835

ie=YoutubeTabIE.ie_key())

6836

6837

6838

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6839

IE_NAME = 'youtube:notif'

6840

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6841

_VALID_URL = r':ytnotif(?:ication)?s?'

6842

_LOGIN_REQUIRED = True

6843

_TESTS = [{

6844

'url': ':ytnotif',

6845

'only_matching': True,

6846

}, {

6847

'url': ':ytnotifications',

6848

'only_matching': True,

6849

}]

6850

6851

def _extract_notification_menu(self, response, continuation_list):

6852

notification_list = traverse_obj(

6853

response,

6854

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6855

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6856

expected_type=list) or []

6857

continuation_list[0] = None

6858

for item in notification_list:

6859

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6860

if entry:

6861

yield entry

6862

continuation = item.get('continuationItemRenderer')

6863

if continuation:

6864

continuation_list[0] = continuation

6865

6866

def _extract_notification_renderer(self, notification):

6867

video_id = traverse_obj(

6868

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6869

url = f'https://www.youtube.com/watch?v={video_id}'

6870

channel_id = None

6871

if not video_id:

6872

browse_ep = traverse_obj(

6873

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6874

channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))

6875

post_id = self._search_regex(

6876

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6877

'post id', default=None)

6878

if not channel_id or not post_id:

6879

return

6880

# The direct /post url redirects to this in the browser

6881

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6882

6883

channel = traverse_obj(

6884

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6885

expected_type=str)

6886

notification_title = self._get_text(notification, 'shortMessage')

6887

if notification_title:

6888

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6889

# TODO: handle recommended videos

6890

title = self._search_regex(

6891

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6892

'video title', default=None)

6893

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6894

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6900

'video_id': video_id,

6901

'title': title,

6902

'channel_id': channel_id,

6903

'channel': channel,

6904

'uploader': channel,

6905

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6906

'timestamp': timestamp,

6907

}

6908

6909

def _notification_menu_entries(self, ytcfg):

6910

continuation_list = [None]

6911

response = None

6912

for page in itertools.count(1):

6913

ctoken = traverse_obj(

6914

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6915

response = self._extract_response(

6916

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6917

ep='notification/get_notification_menu', check_get_keys='actions',

6918

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6919

yield from self._extract_notification_menu(response, continuation_list)

6920

if not continuation_list[0]:

6921

break

6922

6923

def _real_extract(self, url):

6924

display_id = 'notifications'

6925

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6926

self._report_playlist_authcheck(ytcfg)

6927

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6928

6929

6930

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6931

IE_DESC = 'YouTube search'

6932

IE_NAME = 'youtube:search'

6933

_SEARCH_KEY = 'ytsearch'

6934

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6935

_TESTS = [{

6936

'url': 'ytsearch5:youtube-dl test video',

6937

'playlist_count': 5,

6938

'info_dict': {

6939

'id': 'youtube-dl test video',

6940

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6946

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6947

_SEARCH_KEY = 'ytsearchdate'

6948

IE_DESC = 'YouTube search, newest videos first'

6949

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6950

_TESTS = [{

6951

'url': 'ytsearchdate5:youtube-dl test video',

6952

'playlist_count': 5,

6953

'info_dict': {

6954

'id': 'youtube-dl test video',

6955

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6961

IE_DESC = 'YouTube search URLs with sorting and filter support'

6962

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6963

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6964

_TESTS = [{

6965

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6966

'playlist_mincount': 5,

6967

'info_dict': {

6968

'id': 'youtube-dl test video',

6969

'title': 'youtube-dl test video',

6970

}

6971

}, {

6972

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6973

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6980

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6985

# 'entries': [{

6986

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

# Channel results

'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',

6993

'info_dict': {

6994

'id': 'kurzgesagt',

6995

'title': 'kurzgesagt',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'id': 'UCsXVk37bltHxD1rDPwtNM8Q',

7001

'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

7002

'ie_key': 'YoutubeTab',

7003

'channel': 'Kurzgesagt – In a Nutshell',

7004

'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',

7005

'title': 'Kurzgesagt – In a Nutshell',

7006

'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',

7007

# No longer available for search as it is set to the handle.

7008

# 'playlist_count': int,

7009

'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

7010

'thumbnails': list,

7011

'uploader_id': '@kurzgesagt',

7012

'uploader_url': 'https://www.youtube.com/@kurzgesagt',

7013

'uploader': 'Kurzgesagt – In a Nutshell',

7014

'channel_is_verified': True,

7015

'channel_follower_count': int,

7016

}

7017

}],

7018

'params': {'extract_flat': True, 'playlist_items': '1'},

7019

'playlist_mincount': 1,

7020

}, {

7021

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

7022

'only_matching': True,

7023

}]

7024

7025

def _real_extract(self, url):

7026

qs = parse_qs(url)

7027

query = (qs.get('search_query') or qs.get('q'))[0]

7028

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

7029

7030

7031

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

7032

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

7033

IE_NAME = 'youtube:music:search_url'

7034

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

7035

_TESTS = [{

7036

'url': 'https://music.youtube.com/search?q=royalty+free+music',

7037

'playlist_count': 16,

7038

'info_dict': {

7039

'id': 'royalty free music',

7040

'title': 'royalty free music',

7041

}

7042

}, {

7043

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

7044

'playlist_mincount': 30,

7045

'info_dict': {

7046

'id': 'royalty free music - songs',

7047

'title': 'royalty free music - songs',

7048

},

7049

'params': {'extract_flat': 'in_playlist'}

7050

}, {

7051

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

7052

'playlist_mincount': 30,

7053

'info_dict': {

7054

'id': 'royalty free music - community playlists',

7055

'title': 'royalty free music - community playlists',

7056

},

7057

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

7062

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

7063

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

7064

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

7065

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

7066

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

7067

}

7068

7069

def _real_extract(self, url):

7070

qs = parse_qs(url)

7071

query = (qs.get('search_query') or qs.get('q'))[0]

7072

params = qs.get('sp', (None,))[0]

7073

if params:

7074

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

7075

else:

7076

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

7077

params = self._SECTIONS.get(section)

7078

if not params:

7079

section = None

7080

title = join_nonempty(query, section, delim=' - ')

7081

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

7082

7083

7084

class YoutubeFeedsInfoExtractor(InfoExtractor):

7085

"""

7086

Base class for feed extractors

7087

Subclasses must re-define the _FEED_NAME property.

7088

"""

7089

_LOGIN_REQUIRED = True

7090

_FEED_NAME = 'feeds'

7091

7092

def _real_initialize(self):

7093

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

7098

7099

def _real_extract(self, url):

7100

return self.url_result(

7101

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

7102

7103

7104

class YoutubeWatchLaterIE(InfoExtractor):

7105

IE_NAME = 'youtube:watchlater'

7106

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

7107

_VALID_URL = r':ytwatchlater'

7108

_TESTS = [{

7109

'url': ':ytwatchlater',

7110

'only_matching': True,

7111

}]

7112

7113

def _real_extract(self, url):

7114

return self.url_result(

7115

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

7116

7117

7118

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

7119

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

7120

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

7121

_FEED_NAME = 'recommended'

7122

_LOGIN_REQUIRED = False

7123

_TESTS = [{

7124

'url': ':ytrec',

7125

'only_matching': True,

7126

}, {

7127

'url': ':ytrecommended',

7128

'only_matching': True,

7129

}, {

7130

'url': 'https://youtube.com',

7131

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

7136

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

7137

_VALID_URL = r':ytsub(?:scription)?s?'

7138

_FEED_NAME = 'subscriptions'

7139

_TESTS = [{

7140

'url': ':ytsubs',

7141

'only_matching': True,

7142

}, {

7143

'url': ':ytsubscriptions',

7144

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

7149

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

7150

_VALID_URL = r':ythis(?:tory)?'

7151

_FEED_NAME = 'history'

7152

_TESTS = [{

7153

'url': ':ythistory',

7154

'only_matching': True,

}]

class YoutubeShortsAudioPivotIE(InfoExtractor):

7159

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

7160

IE_NAME = 'youtube:shorts:pivot:audio'

7161

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

7162

_TESTS = [{

7163

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

7164

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

7169

"""

7170

Generates sfv_audio_pivot browse params for this video id

7171

"""

7172

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

7173

return urllib.parse.quote(base64.b64encode(pb_params).decode())

7174

7175

def _real_extract(self, url):

7176

video_id = self._match_id(url)

7177

return self.url_result(

7178

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

7183

IE_NAME = 'youtube:truncated_url'

7184

IE_DESC = False # Do not list

7185

_VALID_URL = r'''(?x)

7186

(?:https?://)?

7187

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

7188

(?:watch\?(?:

7189

feature=[a-z_]+|

7190

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

7203

'only_matching': True,

7204

}, {

7205

'url': 'https://www.youtube.com/watch?',

7206

'only_matching': True,

7207

}, {

7208

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

7209

'only_matching': True,

7210

}, {

7211

'url': 'https://www.youtube.com/watch?feature=foo',

7212

'only_matching': True,

7213

}, {

7214

'url': 'https://www.youtube.com/watch?hl=en-GB',

7215

'only_matching': True,

7216

}, {

7217

'url': 'https://www.youtube.com/watch?t=2372',

7218

'only_matching': True,

7219

}]

7220

7221

def _real_extract(self, url):

7222

raise ExtractorError(

7223

'Did you forget to quote the URL? Remember that & is a meta '

7224

'character in most shells, so you want to put the URL in quotes, '

7225

'like youtube-dl '

7226

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

7227

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

7232

IE_NAME = 'youtube:clip'

7233

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

7234

_TESTS = [{

7235

# FIXME: Other metadata should be extracted from the clip, not from the base video

7236

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

7237

'info_dict': {

7238

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

7239

'ext': 'mp4',

7240

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

7245

'categories': ['Gaming'],

7246

'channel': 'Scott The Woz',

7247

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

7248

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

7249

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

7250

'like_count': int,

7251

'playable_in_embed': True,

7252

'tags': 'count:17',

7253

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

7254

'title': 'Mobile Games on Console - Scott The Woz',

7255

'upload_date': '20210920',

7256

'uploader': 'Scott The Woz',

7257

'uploader_id': '@ScottTheWoz',

7258

'uploader_url': 'https://www.youtube.com/@ScottTheWoz',

7259

'view_count': int,

7260

'live_status': 'not_live',

7261

'channel_follower_count': int,

7262

'chapters': 'count:20',

7263

'comment_count': int,

7264

'heatmap': 'count:100',

}

}]

def _real_extract(self, url):

7269

clip_id = self._match_id(url)

7270

_, data = self._extract_webpage(url, clip_id)

7271

7272

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

7273

if not video_id:

7274

raise ExtractorError('Unable to find video ID')

7275

7276

clip_data = traverse_obj(data, (

7277

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

7278

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

7279

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

7280

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

7281

7282

return {

7283

'_type': 'url_transparent',

7284

'url': f'https://www.youtube.com/watch?v={video_id}',

7285

'ie_key': YoutubeIE.ie_key(),

7286

'id': clip_id,

7287

'section_start': int(clip_data['startTimeMs']) / 1000,

7288

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):

7293

IE_NAME = 'youtube:consent'

7294

IE_DESC = False # Do not list

7295

_VALID_URL = r'https?://consent\.youtube\.com/m\?'

7296

_TESTS = [{

7297

'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': '@sana_natori',

7303

'comment_count': int,

7304

'chapters': 'count:13',

7305

'upload_date': '20221223',

7306

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

7307

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

7308

'uploader_url': 'https://www.youtube.com/@sana_natori',

7309

'like_count': int,

7310

'release_date': '20221223',

7311

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

7312

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

7313

'view_count': int,

7314

'playable_in_embed': True,

7315

'duration': 4438,

7316

'availability': 'public',

7317

'channel_follower_count': int,

7318

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

7319

'categories': ['Entertainment'],

7320

'live_status': 'was_live',

7321

'release_timestamp': 1671793345,

7322

'channel': 'さなちゃんねる',

7323

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

7324

'uploader': 'さなちゃんねる',

7325

'channel_is_verified': True,

7326

'heatmap': 'count:100',

7327

},

7328

'add_ie': ['Youtube'],

7329

'params': {'skip_download': 'Youtube'},

7330

}]

7331

7332

def _real_extract(self, url):

7333

redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])

7334

if not redirect_url:

7335

raise ExtractorError('Invalid cookie consent redirect URL', expected=True)

7336

return self.url_result(redirect_url)

7337

7338

7339

class YoutubeTruncatedIDIE(InfoExtractor):

7340

IE_NAME = 'youtube:truncated_id'

7341

IE_DESC = False # Do not list

7342

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

7343

7344

_TESTS = [{

7345

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

7346

'only_matching': True,

7347

}]

7348

7349

def _real_extract(self, url):

7350

video_id = self._match_id(url)

7351

raise ExtractorError(

7352

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

7353

expected=True)