jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import collections
	4	import copy
	5	import datetime
	6	import enum
	7	import hashlib
	8	import itertools
	9	import json
	10	import math
	11	import os.path
	12	import random
	13	import re
	14	import sys
	15	import threading
	16	import time
	17	import traceback
	18	import urllib.error
	19	import urllib.parse
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from .openload import PhantomJSwrapper
	23	from ..compat import functools
	24	from ..jsinterp import JSInterpreter
	25	from ..utils import (
	26	NO_DEFAULT,
	27	ExtractorError,
	28	LazyList,
	29	UserNotLive,
	30	bug_reports_message,
	31	classproperty,
	32	clean_html,
	33	datetime_from_str,
	34	dict_get,
	35	filter_dict,
	36	float_or_none,
	37	format_field,
	38	get_first,
	39	int_or_none,
	40	is_html,
	41	join_nonempty,
	42	js_to_json,
	43	mimetype2ext,
	44	network_exceptions,
	45	orderedSet,
	46	parse_codecs,
	47	parse_count,
	48	parse_duration,
	49	parse_iso8601,
	50	parse_qs,
	51	qualities,
	52	remove_start,
	53	smuggle_url,
	54	str_or_none,
	55	str_to_int,
	56	strftime_or_none,
	57	traverse_obj,
	58	try_get,
	59	unescapeHTML,
	60	unified_strdate,
	61	unified_timestamp,
	62	unsmuggle_url,
	63	update_url_query,
	64	url_or_none,
	65	urljoin,
	66	variadic,
	67	)
	68
	69	# any clients starting with _ cannot be explicitly requested by the user
	70	INNERTUBE_CLIENTS = {
	71	'web': {
	72	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	73	'INNERTUBE_CONTEXT': {
	74	'client': {
	75	'clientName': 'WEB',
	76	'clientVersion': '2.20220801.00.00',
	77	}
	78	},
	79	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	80	},
	81	'web_embedded': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB_EMBEDDED_PLAYER',
	86	'clientVersion': '1.20220731.00.00',
	87	},
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	90	},
	91	'web_music': {
	92	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	93	'INNERTUBE_HOST': 'music.youtube.com',
	94	'INNERTUBE_CONTEXT': {
	95	'client': {
	96	'clientName': 'WEB_REMIX',
	97	'clientVersion': '1.20220727.01.00',
	98	}
	99	},
	100	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	101	},
	102	'web_creator': {
	103	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_CREATOR',
	107	'clientVersion': '1.20220726.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	111	},
	112	'android': {
	113	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'ANDROID',
	117	'clientVersion': '17.31.35',
	118	'androidSdkVersion': 30,
	119	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	120	}
	121	},
	122	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	123	'REQUIRE_JS_PLAYER': False
	124	},
	125	'android_embedded': {
	126	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	127	'INNERTUBE_CONTEXT': {
	128	'client': {
	129	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	130	'clientVersion': '17.31.35',
	131	'androidSdkVersion': 30,
	132	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '5.16.51',
	144	'androidSdkVersion': 30,
	145	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	146	}
	147	},
	148	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	149	'REQUIRE_JS_PLAYER': False
	150	},
	151	'android_creator': {
	152	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	153	'INNERTUBE_CONTEXT': {
	154	'client': {
	155	'clientName': 'ANDROID_CREATOR',
	156	'clientVersion': '22.30.100',
	157	'androidSdkVersion': 30,
	158	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	159	},
	160	},
	161	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	162	'REQUIRE_JS_PLAYER': False
	163	},
	164	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	165	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	166	'ios': {
	167	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	168	'INNERTUBE_CONTEXT': {
	169	'client': {
	170	'clientName': 'IOS',
	171	'clientVersion': '17.33.2',
	172	'deviceModel': 'iPhone14,3',
	173	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '17.33.2',
	184	'deviceModel': 'iPhone14,3',
	185	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '5.21',
	197	'deviceModel': 'iPhone14,3',
	198	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	199	},
	200	},
	201	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	202	'REQUIRE_JS_PLAYER': False
	203	},
	204	'ios_creator': {
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'IOS_CREATOR',
	208	'clientVersion': '22.33.101',
	209	'deviceModel': 'iPhone14,3',
	210	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	211	},
	212	},
	213	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	214	'REQUIRE_JS_PLAYER': False
	215	},
	216	# mweb has 'ultralow' formats
	217	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	218	'mweb': {
	219	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	220	'INNERTUBE_CONTEXT': {
	221	'client': {
	222	'clientName': 'MWEB',
	223	'clientVersion': '2.20220801.00.00',
	224	}
	225	},
	226	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	227	},
	228	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	229	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	230	'tv_embedded': {
	231	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	232	'INNERTUBE_CONTEXT': {
	233	'client': {
	234	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	235	'clientVersion': '2.0',
	236	},
	237	},
	238	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	239	},
	240	}
	241
	242
	243	def _split_innertube_client(client_name):
	244	variant, *base = client_name.rsplit('.', 1)
	245	if base:
	246	return variant, base[0], variant
	247	base, *variant = client_name.split('_', 1)
	248	return client_name, base, variant[0] if variant else None
	249
	250
	251	def build_innertube_clients():
	252	THIRD_PARTY = {
	253	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	254	}
	255	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	256	priority = qualities(BASE_CLIENTS[::-1])
	257
	258	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	259	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	260	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	261	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	262	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	263
	264	_, base_client, variant = _split_innertube_client(client)
	265	ytcfg['priority'] = 10 * priority(base_client)
	266
	267	if not variant:
	268	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	269	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	270	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	271	embedscreen['priority'] -= 3
	272	elif variant == 'embedded':
	273	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	274	ytcfg['priority'] -= 2
	275	else:
	276	ytcfg['priority'] -= 3
	277
	278
	279	build_innertube_clients()
	280
	281
	282	class BadgeType(enum.Enum):
	283	AVAILABILITY_UNLISTED = enum.auto()
	284	AVAILABILITY_PRIVATE = enum.auto()
	285	AVAILABILITY_PUBLIC = enum.auto()
	286	AVAILABILITY_PREMIUM = enum.auto()
	287	AVAILABILITY_SUBSCRIPTION = enum.auto()
	288	LIVE_NOW = enum.auto()
	289
	290
	291	class YoutubeBaseInfoExtractor(InfoExtractor):
	292	"""Provide base functions for Youtube extractors"""
	293
	294	_RESERVED_NAMES = (
	295	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|live\|watch_popup\|clip\|'
	296	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	297	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	298	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	299
	300	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	301
	302	# _NETRC_MACHINE = 'youtube'
	303
	304	# If True it will raise an error if no login info is provided
	305	_LOGIN_REQUIRED = False
	306
	307	_INVIDIOUS_SITES = (
	308	# invidious-redirect websites
	309	r'(?:www\.)?redirect\.invidious\.io',
	310	r'(?:(?:www\|dev)\.)?invidio\.us',
	311	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	312	r'(?:www\.)?invidious\.pussthecat\.org',
	313	r'(?:www\.)?invidious\.zee\.li',
	314	r'(?:www\.)?invidious\.ethibox\.fr',
	315	r'(?:www\.)?iv\.ggtyler\.dev',
	316	r'(?:www\.)?inv\.vern\.i2p',
	317	r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
	318	r'(?:www\.)?inv\.riverside\.rocks',
	319	r'(?:www\.)?invidious\.silur\.me',
	320	r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
	321	r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
	322	r'(?:www\.)?invidious\.slipfox\.xyz',
	323	r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
	324	r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
	325	r'(?:www\.)?invidious\.tiekoetter\.com',
	326	r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
	327	r'(?:www\.)?invidious\.nerdvpn\.de',
	328	r'(?:www\.)?invidious\.weblibre\.org',
	329	r'(?:www\.)?inv\.odyssey346\.dev',
	330	r'(?:www\.)?invidious\.dhusch\.de',
	331	r'(?:www\.)?iv\.melmac\.space',
	332	r'(?:www\.)?watch\.thekitty\.zone',
	333	r'(?:www\.)?invidious\.privacydev\.net',
	334	r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
	335	r'(?:www\.)?invidious\.drivet\.xyz',
	336	r'(?:www\.)?vid\.priv\.au',
	337	r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
	338	r'(?:www\.)?inv\.vern\.cc',
	339	r'(?:www\.)?invidious\.esmailelbob\.xyz',
	340	r'(?:www\.)?invidious\.sethforprivacy\.com',
	341	r'(?:www\.)?yt\.oelrichsgarcia\.de',
	342	r'(?:www\.)?yt\.artemislena\.eu',
	343	r'(?:www\.)?invidious\.flokinet\.to',
	344	r'(?:www\.)?invidious\.baczek\.me',
	345	r'(?:www\.)?y\.com\.sb',
	346	r'(?:www\.)?invidious\.epicsite\.xyz',
	347	r'(?:www\.)?invidious\.lidarshield\.cloud',
	348	r'(?:www\.)?yt\.funami\.tech',
	349	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	350	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	351	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	352	# youtube-dl invidious instances list
	353	r'(?:(?:www\|no)\.)?invidiou\.sh',
	354	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	355	r'(?:www\.)?invidious\.kabi\.tk',
	356	r'(?:www\.)?invidious\.mastodon\.host',
	357	r'(?:www\.)?invidious\.zapashcanon\.fr',
	358	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	359	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	360	r'(?:www\.)?invidious\.himiko\.cloud',
	361	r'(?:www\.)?invidious\.reallyancient\.tech',
	362	r'(?:www\.)?invidious\.tube',
	363	r'(?:www\.)?invidiou\.site',
	364	r'(?:www\.)?invidious\.site',
	365	r'(?:www\.)?invidious\.xyz',
	366	r'(?:www\.)?invidious\.nixnet\.xyz',
	367	r'(?:www\.)?invidious\.048596\.xyz',
	368	r'(?:www\.)?invidious\.drycat\.fr',
	369	r'(?:www\.)?inv\.skyn3t\.in',
	370	r'(?:www\.)?tube\.poal\.co',
	371	r'(?:www\.)?tube\.connect\.cafe',
	372	r'(?:www\.)?vid\.wxzm\.sx',
	373	r'(?:www\.)?vid\.mint\.lgbt',
	374	r'(?:www\.)?vid\.puffyan\.us',
	375	r'(?:www\.)?yewtu\.be',
	376	r'(?:www\.)?yt\.elukerio\.org',
	377	r'(?:www\.)?yt\.lelux\.fi',
	378	r'(?:www\.)?invidious\.ggc-project\.de',
	379	r'(?:www\.)?yt\.maisputain\.ovh',
	380	r'(?:www\.)?ytprivate\.com',
	381	r'(?:www\.)?invidious\.13ad\.de',
	382	r'(?:www\.)?invidious\.toot\.koeln',
	383	r'(?:www\.)?invidious\.fdn\.fr',
	384	r'(?:www\.)?watch\.nettohikari\.com',
	385	r'(?:www\.)?invidious\.namazso\.eu',
	386	r'(?:www\.)?invidious\.silkky\.cloud',
	387	r'(?:www\.)?invidious\.exonip\.de',
	388	r'(?:www\.)?invidious\.riverside\.rocks',
	389	r'(?:www\.)?invidious\.blamefran\.net',
	390	r'(?:www\.)?invidious\.moomoo\.de',
	391	r'(?:www\.)?ytb\.trom\.tf',
	392	r'(?:www\.)?yt\.cyberhost\.uk',
	393	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	394	r'(?:www\.)?qklhadlycap4cnod\.onion',
	395	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	396	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	397	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	398	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	399	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	400	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	401	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	402	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	403	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	404	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	405	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	406	r'(?:www\.)?piped\.kavin\.rocks',
	407	r'(?:www\.)?piped\.tokhmi\.xyz',
	408	r'(?:www\.)?piped\.syncpundit\.io',
	409	r'(?:www\.)?piped\.mha\.fi',
	410	r'(?:www\.)?watch\.whatever\.social',
	411	r'(?:www\.)?piped\.garudalinux\.org',
	412	r'(?:www\.)?piped\.rivo\.lol',
	413	r'(?:www\.)?piped-libre\.kavin\.rocks',
	414	r'(?:www\.)?yt\.jae\.fi',
	415	r'(?:www\.)?piped\.mint\.lgbt',
	416	r'(?:www\.)?il\.ax',
	417	r'(?:www\.)?piped\.esmailelbob\.xyz',
	418	r'(?:www\.)?piped\.projectsegfau\.lt',
	419	r'(?:www\.)?piped\.privacydev\.net',
	420	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	421	r'(?:www\.)?piped\.smnz\.de',
	422	r'(?:www\.)?piped\.adminforge\.de',
	423	r'(?:www\.)?watch\.whatevertinfoil\.de',
	424	r'(?:www\.)?piped\.qdi\.fi',
	425	r'(?:www\.)?piped\.video',
	426	r'(?:www\.)?piped\.aeong\.one',
	427	r'(?:www\.)?piped\.moomoo\.me',
	428	r'(?:www\.)?piped\.chauvet\.pro',
	429	r'(?:www\.)?watch\.leptons\.xyz',
	430	r'(?:www\.)?pd\.vern\.cc',
	431	r'(?:www\.)?piped\.hostux\.net',
	432	r'(?:www\.)?piped\.lunar\.icu',
	433	# Hyperpipe instances from https://hyperpipe.codeberg.page/
	434	r'(?:www\.)?hyperpipe\.surge\.sh',
	435	r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
	436	r'(?:www\.)?listen\.whatever\.social',
	437	r'(?:www\.)?music\.adminforge\.de',
	438	)
	439
	440	# extracted from account/account_menu ep
	441	# XXX: These are the supported YouTube UI and API languages,
	442	# which is slightly different from languages supported for translation in YouTube studio
	443	_SUPPORTED_LANG_CODES = [
	444	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	445	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	446	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	447	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	448	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	449	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	450	]
	451
	452	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	453
	454	@functools.cached_property
	455	def _preferred_lang(self):
	456	"""
	457	Returns a language code supported by YouTube for the user preferred language.
	458	Returns None if no preferred language set.
	459	"""
	460	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	461	if not preferred_lang:
	462	return
	463	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	464	raise ExtractorError(
	465	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	466	expected=True)
	467	elif preferred_lang != 'en':
	468	self.report_warning(
	469	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	470	return preferred_lang
	471
	472	def _initialize_consent(self):
	473	cookies = self._get_cookies('https://www.youtube.com/')
	474	if cookies.get('__Secure-3PSID'):
	475	return
	476	consent_id = None
	477	consent = cookies.get('CONSENT')
	478	if consent:
	479	if 'YES' in consent.value:
	480	return
	481	consent_id = self._search_regex(
	482	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	483	if not consent_id:
	484	consent_id = random.randint(100, 999)
	485	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	486
	487	def _initialize_pref(self):
	488	cookies = self._get_cookies('https://www.youtube.com/')
	489	pref_cookie = cookies.get('PREF')
	490	pref = {}
	491	if pref_cookie:
	492	try:
	493	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	494	except ValueError:
	495	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	496	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	497	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	498
	499	def _real_initialize(self):
	500	self._initialize_pref()

1

import base64

import calendar

import collections

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

22

from .openload import PhantomJSwrapper

23

from ..compat import functools

24

from ..jsinterp import JSInterpreter

25

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

70

INNERTUBE_CLIENTS = {

71

'web': {

72

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

73

'INNERTUBE_CONTEXT': {

74

'client': {

75

'clientName': 'WEB',

76

'clientVersion': '2.20220801.00.00',

77

}

78

},

79

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

80

},

81

'web_embedded': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB_EMBEDDED_PLAYER',

86

'clientVersion': '1.20220731.00.00',

87

},

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

90

},

91

'web_music': {

92

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

93

'INNERTUBE_HOST': 'music.youtube.com',

94

'INNERTUBE_CONTEXT': {

95

'client': {

96

'clientName': 'WEB_REMIX',

97

'clientVersion': '1.20220727.01.00',

98

}

99

},

100

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

101

},

102

'web_creator': {

103

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_CREATOR',

107

'clientVersion': '1.20220726.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

111

},

112

'android': {

113

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'ANDROID',

117

'clientVersion': '17.31.35',

118

'androidSdkVersion': 30,

119

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

120

}

121

},

122

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

123

'REQUIRE_JS_PLAYER': False

124

},

125

'android_embedded': {

126

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

127

'INNERTUBE_CONTEXT': {

128

'client': {

129

'clientName': 'ANDROID_EMBEDDED_PLAYER',

130

'clientVersion': '17.31.35',

131

'androidSdkVersion': 30,

132

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '5.16.51',

144

'androidSdkVersion': 30,

145

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

146

}

147

},

148

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

149

'REQUIRE_JS_PLAYER': False

150

},

151

'android_creator': {

152

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

153

'INNERTUBE_CONTEXT': {

154

'client': {

155

'clientName': 'ANDROID_CREATOR',

156

'clientVersion': '22.30.100',

157

'androidSdkVersion': 30,

158

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

159

},

160

},

161

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

162

'REQUIRE_JS_PLAYER': False

163

},

164

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

165

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

166

'ios': {

167

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

168

'INNERTUBE_CONTEXT': {

169

'client': {

170

'clientName': 'IOS',

171

'clientVersion': '17.33.2',

172

'deviceModel': 'iPhone14,3',

173

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '17.33.2',

184

'deviceModel': 'iPhone14,3',

185

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '5.21',

197

'deviceModel': 'iPhone14,3',

198

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

199

},

200

},

201

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

202

'REQUIRE_JS_PLAYER': False

203

},

204

'ios_creator': {

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'IOS_CREATOR',

208

'clientVersion': '22.33.101',

209

'deviceModel': 'iPhone14,3',

210

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

211

},

212

},

213

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

214

'REQUIRE_JS_PLAYER': False

215

},

216

# mweb has 'ultralow' formats

217

# See: https://github.com/yt-dlp/yt-dlp/pull/557

218

'mweb': {

219

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

220

'INNERTUBE_CONTEXT': {

221

'client': {

222

'clientName': 'MWEB',

223

'clientVersion': '2.20220801.00.00',

224

}

225

},

226

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

227

},

228

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

229

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

230

'tv_embedded': {

231

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

232

'INNERTUBE_CONTEXT': {

233

'client': {

234

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

235

'clientVersion': '2.0',

236

},

237

},

238

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

244

variant, *base = client_name.rsplit('.', 1)

245

if base:

246

return variant, base[0], variant

247

base, *variant = client_name.split('_', 1)

248

return client_name, base, variant[0] if variant else None

249

250

251

def build_innertube_clients():

252

THIRD_PARTY = {

253

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

254

}

255

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

256

priority = qualities(BASE_CLIENTS[::-1])

257

258

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

259

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

260

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

261

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

262

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

263

264

_, base_client, variant = _split_innertube_client(client)

265

ytcfg['priority'] = 10 * priority(base_client)

266

267

if not variant:

268

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

269

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

270

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

271

embedscreen['priority'] -= 3

272

elif variant == 'embedded':

273

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

274

ytcfg['priority'] -= 2

275

else:

276

ytcfg['priority'] -= 3

277

278

279

build_innertube_clients()

280

281

282

class BadgeType(enum.Enum):

283

AVAILABILITY_UNLISTED = enum.auto()

284

AVAILABILITY_PRIVATE = enum.auto()

285

AVAILABILITY_PUBLIC = enum.auto()

286

AVAILABILITY_PREMIUM = enum.auto()

287

AVAILABILITY_SUBSCRIPTION = enum.auto()

288

LIVE_NOW = enum.auto()

289

290

291

class YoutubeBaseInfoExtractor(InfoExtractor):

292

"""Provide base functions for Youtube extractors"""

_RESERVED_NAMES = (

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

301

302

# _NETRC_MACHINE = 'youtube'

303

304

# If True it will raise an error if no login info is provided

305

_LOGIN_REQUIRED = False

306

307

_INVIDIOUS_SITES = (

308

# invidious-redirect websites

309

r'(?:www\.)?redirect\.invidious\.io',

310

r'(?:(?:www|dev)\.)?invidio\.us',

311

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

312

r'(?:www\.)?invidious\.pussthecat\.org',

313

r'(?:www\.)?invidious\.zee\.li',

314

r'(?:www\.)?invidious\.ethibox\.fr',

315

r'(?:www\.)?iv\.ggtyler\.dev',

316

r'(?:www\.)?inv\.vern\.i2p',

317

r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',

318

r'(?:www\.)?inv\.riverside\.rocks',

319

r'(?:www\.)?invidious\.silur\.me',

320

r'(?:www\.)?inv\.bp\.projectsegfau\.lt',

321

r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',

322

r'(?:www\.)?invidious\.slipfox\.xyz',

323

r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',

324

r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',

325

r'(?:www\.)?invidious\.tiekoetter\.com',

326

r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',

327

r'(?:www\.)?invidious\.nerdvpn\.de',

328

r'(?:www\.)?invidious\.weblibre\.org',

329

r'(?:www\.)?inv\.odyssey346\.dev',

330

r'(?:www\.)?invidious\.dhusch\.de',

331

r'(?:www\.)?iv\.melmac\.space',

332

r'(?:www\.)?watch\.thekitty\.zone',

333

r'(?:www\.)?invidious\.privacydev\.net',

334

r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',

335

r'(?:www\.)?invidious\.drivet\.xyz',

336

r'(?:www\.)?vid\.priv\.au',

337

r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',

338

r'(?:www\.)?inv\.vern\.cc',

339

r'(?:www\.)?invidious\.esmailelbob\.xyz',

340

r'(?:www\.)?invidious\.sethforprivacy\.com',

341

r'(?:www\.)?yt\.oelrichsgarcia\.de',

342

r'(?:www\.)?yt\.artemislena\.eu',

343

r'(?:www\.)?invidious\.flokinet\.to',

344

r'(?:www\.)?invidious\.baczek\.me',

345

r'(?:www\.)?y\.com\.sb',

346

r'(?:www\.)?invidious\.epicsite\.xyz',

347

r'(?:www\.)?invidious\.lidarshield\.cloud',

348

r'(?:www\.)?yt\.funami\.tech',

349

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

350

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

351

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

352

# youtube-dl invidious instances list

353

r'(?:(?:www|no)\.)?invidiou\.sh',

354

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

355

r'(?:www\.)?invidious\.kabi\.tk',

356

r'(?:www\.)?invidious\.mastodon\.host',

357

r'(?:www\.)?invidious\.zapashcanon\.fr',

358

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

359

r'(?:www\.)?invidious\.tinfoil-hat\.net',

360

r'(?:www\.)?invidious\.himiko\.cloud',

361

r'(?:www\.)?invidious\.reallyancient\.tech',

362

r'(?:www\.)?invidious\.tube',

363

r'(?:www\.)?invidiou\.site',

364

r'(?:www\.)?invidious\.site',

365

r'(?:www\.)?invidious\.xyz',

366

r'(?:www\.)?invidious\.nixnet\.xyz',

367

r'(?:www\.)?invidious\.048596\.xyz',

368

r'(?:www\.)?invidious\.drycat\.fr',

369

r'(?:www\.)?inv\.skyn3t\.in',

370

r'(?:www\.)?tube\.poal\.co',

371

r'(?:www\.)?tube\.connect\.cafe',

372

r'(?:www\.)?vid\.wxzm\.sx',

373

r'(?:www\.)?vid\.mint\.lgbt',

374

r'(?:www\.)?vid\.puffyan\.us',

375

r'(?:www\.)?yewtu\.be',

376

r'(?:www\.)?yt\.elukerio\.org',

377

r'(?:www\.)?yt\.lelux\.fi',

378

r'(?:www\.)?invidious\.ggc-project\.de',

379

r'(?:www\.)?yt\.maisputain\.ovh',

380

r'(?:www\.)?ytprivate\.com',

381

r'(?:www\.)?invidious\.13ad\.de',

382

r'(?:www\.)?invidious\.toot\.koeln',

383

r'(?:www\.)?invidious\.fdn\.fr',

384

r'(?:www\.)?watch\.nettohikari\.com',

385

r'(?:www\.)?invidious\.namazso\.eu',

386

r'(?:www\.)?invidious\.silkky\.cloud',

387

r'(?:www\.)?invidious\.exonip\.de',

388

r'(?:www\.)?invidious\.riverside\.rocks',

389

r'(?:www\.)?invidious\.blamefran\.net',

390

r'(?:www\.)?invidious\.moomoo\.de',

391

r'(?:www\.)?ytb\.trom\.tf',

392

r'(?:www\.)?yt\.cyberhost\.uk',

393

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

394

r'(?:www\.)?qklhadlycap4cnod\.onion',

395

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

396

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

397

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

398

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

399

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

400

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

401

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

402

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

403

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

404

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

405

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

406

r'(?:www\.)?piped\.kavin\.rocks',

407

r'(?:www\.)?piped\.tokhmi\.xyz',

408

r'(?:www\.)?piped\.syncpundit\.io',

409

r'(?:www\.)?piped\.mha\.fi',

410

r'(?:www\.)?watch\.whatever\.social',

411

r'(?:www\.)?piped\.garudalinux\.org',

412

r'(?:www\.)?piped\.rivo\.lol',

413

r'(?:www\.)?piped-libre\.kavin\.rocks',

414

r'(?:www\.)?yt\.jae\.fi',

415

r'(?:www\.)?piped\.mint\.lgbt',

416

r'(?:www\.)?il\.ax',

417

r'(?:www\.)?piped\.esmailelbob\.xyz',

418

r'(?:www\.)?piped\.projectsegfau\.lt',

419

r'(?:www\.)?piped\.privacydev\.net',

420

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

421

r'(?:www\.)?piped\.smnz\.de',

422

r'(?:www\.)?piped\.adminforge\.de',

423

r'(?:www\.)?watch\.whatevertinfoil\.de',

424

r'(?:www\.)?piped\.qdi\.fi',

425

r'(?:www\.)?piped\.video',

426

r'(?:www\.)?piped\.aeong\.one',

427

r'(?:www\.)?piped\.moomoo\.me',

428

r'(?:www\.)?piped\.chauvet\.pro',

429

r'(?:www\.)?watch\.leptons\.xyz',

430

r'(?:www\.)?pd\.vern\.cc',

431

r'(?:www\.)?piped\.hostux\.net',

432

r'(?:www\.)?piped\.lunar\.icu',

433

# Hyperpipe instances from https://hyperpipe.codeberg.page/

434

r'(?:www\.)?hyperpipe\.surge\.sh',

435

r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',

436

r'(?:www\.)?listen\.whatever\.social',

437

r'(?:www\.)?music\.adminforge\.de',

438

)

439

440

# extracted from account/account_menu ep

441

# XXX: These are the supported YouTube UI and API languages,

442

# which is slightly different from languages supported for translation in YouTube studio

443

_SUPPORTED_LANG_CODES = [

444

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

445

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

446

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

447

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

448

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

449

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

450

]

451

452

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

453

454

@functools.cached_property

455

def _preferred_lang(self):

456

"""

457

Returns a language code supported by YouTube for the user preferred language.

458

Returns None if no preferred language set.

459

"""

460

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

461

if not preferred_lang:

462

return

463

if preferred_lang not in self._SUPPORTED_LANG_CODES:

464

raise ExtractorError(

465

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

466

expected=True)

467

elif preferred_lang != 'en':

468

self.report_warning(

469

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

470

return preferred_lang

471

472

def _initialize_consent(self):

473

cookies = self._get_cookies('https://www.youtube.com/')

474

if cookies.get('__Secure-3PSID'):

475

return

476

consent_id = None

477

consent = cookies.get('CONSENT')

478

if consent:

479

if 'YES' in consent.value:

480

return

481

consent_id = self._search_regex(

482

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

483

if not consent_id:

484

consent_id = random.randint(100, 999)

485

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

486

487

def _initialize_pref(self):

488

cookies = self._get_cookies('https://www.youtube.com/')

489

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

494

except ValueError:

495

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

496

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

497

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

498

499

def _real_initialize(self):

500

self._initialize_pref()

501

self._initialize_consent()

502

self._check_login_required()

503

504

def _check_login_required(self):

505

if self._LOGIN_REQUIRED and not self._cookies_passed:

506

self.raise_login_required('Login details are needed to download this content', method='cookies')

507

508

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

509

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

510

511

def _get_default_ytcfg(self, client='web'):

512

return copy.deepcopy(INNERTUBE_CLIENTS[client])

513

514

def _get_innertube_host(self, client='web'):

515

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

516

517

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

518

# try_get but with fallback to default ytcfg client values when present

519

_func = lambda y: try_get(y, getter, expected_type)

520

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

521

522

def _extract_client_name(self, ytcfg, default_client='web'):

523

return self._ytcfg_get_safe(

524

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

525

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

526

527

def _extract_client_version(self, ytcfg, default_client='web'):

528

return self._ytcfg_get_safe(

529

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

530

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

531

532

def _select_api_hostname(self, req_api_hostname, default_client=None):

533

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

534

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

535

536

def _extract_api_key(self, ytcfg=None, default_client='web'):

537

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

538

539

def _extract_context(self, ytcfg=None, default_client='web'):

540

context = get_first(

541

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

542

# Enforce language and tz for extraction

543

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

544

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

550

time_now = round(time.time())

551

if self._SAPISID is None:

552

yt_cookies = self._get_cookies('https://www.youtube.com')

553

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

554

# See: https://github.com/yt-dlp/yt-dlp/issues/393

555

sapisid_cookie = dict_get(

556

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

557

if sapisid_cookie and sapisid_cookie.value:

558

self._SAPISID = sapisid_cookie.value

559

self.write_debug('Extracted SAPISID cookie')

560

# SAPISID cookie is required if not already present

561

if not yt_cookies.get('SAPISID'):

562

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

563

self._set_cookie(

564

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

565

else:

566

self._SAPISID = False

567

if not self._SAPISID:

568

return None

569

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

570

sapisidhash = hashlib.sha1(

571

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

572

return f'SAPISIDHASH {time_now}_{sapisidhash}'

573

574

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

575

note='Downloading API JSON', errnote='Unable to download API page',

576

context=None, api_key=None, api_hostname=None, default_client='web'):

577

578

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

579

data.update(query)

580

real_headers = self.generate_api_headers(default_client=default_client)

581

real_headers.update({'content-type': 'application/json'})

582

if headers:

583

real_headers.update(headers)

584

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

585

or api_key or self._extract_api_key(default_client=default_client))

586

return self._download_json(

587

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

588

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

589

data=json.dumps(data).encode('utf8'), headers=real_headers,

590

query={'key': api_key, 'prettyPrint': 'false'})

591

592

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

593

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

594

595

@staticmethod

596

def _extract_session_index(*data):

597

"""

598

Index of current account in account list.

599

See: https://github.com/yt-dlp/yt-dlp/pull/519

600

"""

601

for ytcfg in data:

602

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

603

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

608

if ytcfg:

609

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

614

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

615

'identity token', default=None, fatal=False)

616

617

@staticmethod

618

def _extract_account_syncid(*args):

619

"""

620

Extract syncId required to download private playlists of secondary channels

621

@params response and/or ytcfg

622

"""

623

for data in args:

624

# ytcfg includes channel_syncid if on secondary channel

625

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

630

lambda x: x['DATASYNC_ID']), str) or '').split('||')

631

if len(sync_ids) >= 2 and sync_ids[1]:

632

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

633

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

638

"""

639

Extracts visitorData from an API response or ytcfg

640

Appears to be used to track session state

641

"""

642

return get_first(

643

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

644

expected_type=str)

645

646

@functools.cached_property

647

def is_authenticated(self):

648

return bool(self._generate_sapisidhash_header())

649

650

def extract_ytcfg(self, video_id, webpage):

651

if not webpage:

652

return {}

653

return self._parse_json(

654

self._search_regex(

655

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

656

default='{}'), video_id, fatal=False) or {}

657

658

def generate_api_headers(

659

self, *, ytcfg=None, account_syncid=None, session_index=None,

660

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

661

662

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

663

headers = {

664

'X-YouTube-Client-Name': str(

665

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

666

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

667

'Origin': origin,

668

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

669

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

670

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

671

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

672

}

673

if session_index is None:

674

session_index = self._extract_session_index(ytcfg)

675

if account_syncid or session_index is not None:

676

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

677

678

auth = self._generate_sapisidhash_header(origin)

679

if auth is not None:

680

headers['Authorization'] = auth

681

headers['X-Origin'] = origin

682

return filter_dict(headers)

683

684

def _download_ytcfg(self, client, video_id):

685

url = {

686

'web': 'https://www.youtube.com',

687

'web_music': 'https://music.youtube.com',

688

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

693

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

694

return self.extract_ytcfg(video_id, webpage) or {}

695

696

@staticmethod

697

def _build_api_continuation_query(continuation, ctp=None):

698

query = {

699

'continuation': continuation

700

}

701

# TODO: Inconsistency with clickTrackingParams.

702

# Currently we have a fixed ctp contained within context (from ytcfg)

703

# and a ctp in root query for continuation.

704

if ctp:

705

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

710

next_continuation = try_get(

711

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

712

lambda x: x['continuation']['reloadContinuationData']), dict)

713

if not next_continuation:

714

return

715

continuation = next_continuation.get('continuation')

716

if not continuation:

717

return

718

ctp = next_continuation.get('clickTrackingParams')

719

return cls._build_api_continuation_query(continuation, ctp)

720

721

@classmethod

722

def _extract_continuation_ep_data(cls, continuation_ep: dict):

723

if isinstance(continuation_ep, dict):

724

continuation = try_get(

725

continuation_ep, lambda x: x['continuationCommand']['token'], str)

726

if not continuation:

727

return

728

ctp = continuation_ep.get('clickTrackingParams')

729

return cls._build_api_continuation_query(continuation, ctp)

730

731

@classmethod

732

def _extract_continuation(cls, renderer):

733

next_continuation = cls._extract_next_continuation_data(renderer)

734

if next_continuation:

735

return next_continuation

736

737

return traverse_obj(renderer, (

738

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

739

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

740

), get_all=False, expected_type=cls._extract_continuation_ep_data)

741

742

@classmethod

743

def _extract_alerts(cls, data):

744

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

745

if not isinstance(alert_dict, dict):

746

continue

747

for alert in alert_dict.values():

748

alert_type = alert.get('type')

749

if not alert_type:

750

continue

751

message = cls._get_text(alert, 'text')

752

if message:

753

yield alert_type, message

754

755

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

756

errors, warnings = [], []

757

for alert_type, alert_message in alerts:

758

if alert_type.lower() == 'error' and fatal:

759

errors.append([alert_type, alert_message])

760

elif alert_message not in self._IGNORED_WARNINGS:

761

warnings.append([alert_type, alert_message])

762

763

for alert_type, alert_message in (warnings + errors[:-1]):

764

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

765

if errors:

766

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

767

768

def _extract_and_report_alerts(self, data, *args, **kwargs):

769

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

770

771

def _extract_badges(self, renderer: dict):

772

privacy_icon_map = {

773

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

774

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

775

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

780

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

781

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

786

'private': BadgeType.AVAILABILITY_PRIVATE,

787

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

788

'live': BadgeType.LIVE_NOW,

789

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):

794

badge_type = (

795

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

796

or badge_style_map.get(traverse_obj(badge, 'style'))

797

)

798

if badge_type:

799

badges.append({'type': badge_type})

800

continue

801

802

# fallback, won't work in some languages

803

label = traverse_obj(badge, 'label', expected_type=str, default='')

804

for match, label_badge_type in label_map.items():

805

if match in label.lower():

806

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

813

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

814

815

@staticmethod

816

def _get_text(data, *path_list, max_runs=None):

817

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

822

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

823

obj = [obj]

824

for item in obj:

825

text = try_get(item, lambda x: x['simpleText'], str)

826

if text:

827

return text

828

runs = try_get(item, lambda x: x['runs'], list) or []

829

if not runs and isinstance(item, list):

830

runs = item

831

832

runs = runs[:min(len(runs), max_runs or len(runs))]

833

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))

if text:

return text

def _get_count(self, data, *path_list):

838

count_text = self._get_text(data, *path_list) or ''

839

count = parse_count(count_text)

840

if count is None:

841

count = str_to_int(

842

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

847

"""

848

Extract thumbnails from thumbnails dict

849

@param path_list: path list to level that contains 'thumbnails' key

850

"""

851

thumbnails = []

852

for path in path_list or [()]:

853

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):

854

thumbnail_url = url_or_none(thumbnail.get('url'))

855

if not thumbnail_url:

856

continue

857

# Sometimes youtube gives a wrong thumbnail URL. See:

858

# https://github.com/yt-dlp/yt-dlp/issues/233

859

# https://github.com/ytdl-org/youtube-dl/issues/28023

860

if 'maxresdefault' in thumbnail_url:

861

thumbnail_url = thumbnail_url.split('?')[0]

862

thumbnails.append({

863

'url': thumbnail_url,

864

'height': int_or_none(thumbnail.get('height')),

865

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

871

"""

872

Extracts a relative time from string and converts to dt object

873

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

878

if start:

879

return datetime_from_str(start)

880

try:

881

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

886

if not text:

887

return

888

dt = self.extract_relative_time(text)

889

timestamp = None

890

if isinstance(dt, datetime.datetime):

891

timestamp = calendar.timegm(dt.timetuple())

892

893

if timestamp is None:

894

timestamp = (

895

unified_timestamp(text) or unified_timestamp(

896

self._search_regex(

897

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

898

text.lower(), 'time text', default=None)))

899

900

if text and timestamp is None and self._preferred_lang in (None, 'en'):

901

self.report_warning(

902

f'Cannot parse localized time text "{text}"', only_once=True)

903

return timestamp

904

905

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

906

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

907

default_client='web'):

908

for retry in self.RetryManager():

909

try:

910

response = self._call_api(

911

ep=ep, fatal=True, headers=headers,

912

video_id=item_id, query=query, note=note,

913

context=self._extract_context(ytcfg, default_client),

914

api_key=self._extract_api_key(ytcfg, default_client),

915

api_hostname=api_hostname, default_client=default_client)

916

except ExtractorError as e:

917

if not isinstance(e.cause, network_exceptions):

918

return self._error_or_warning(e, fatal=fatal)

919

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

924

if not is_html(first_bytes):

925

yt_error = try_get(

926

self._parse_json(

927

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

928

lambda x: x['error']['message'], str)

929

if yt_error:

930

self._report_alerts([('ERROR', yt_error)], fatal=False)

931

# Downloading page may result in intermittent 5xx HTTP error

932

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

933

# We also want to catch all other network exceptions since errors in later pages can be troublesome

934

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

935

if e.cause.code not in (403, 429):

936

retry.error = e

937

continue

938

return self._error_or_warning(e, fatal=fatal)

939

940

try:

941

self._extract_and_report_alerts(response, only_once=True)

942

except ExtractorError as e:

943

# YouTube servers may return errors we want to retry on in a 200 OK response

944

# See: https://github.com/yt-dlp/yt-dlp/issues/839

945

if 'unknown error' in e.msg.lower():

946

retry.error = e

947

continue

948

return self._error_or_warning(e, fatal=fatal)

949

# Youtube sometimes sends incomplete data

950

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

951

if not traverse_obj(response, *variadic(check_get_keys)):

952

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

959

return re.match(r'https?://music\.youtube\.com/', url) is not None

960

961

def _extract_video(self, renderer):

962

video_id = renderer.get('videoId')

963

964

reel_header_renderer = traverse_obj(renderer, (

965

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

966

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

967

968

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

969

description = self._get_text(renderer, 'descriptionSnippet')

970

971

duration = int_or_none(renderer.get('lengthSeconds'))

972

if duration is None:

973

duration = parse_duration(self._get_text(

974

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

975

if duration is None:

976

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

977

duration = parse_duration(self._search_regex(

978

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

979

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

980

video_id, default=None, group='duration'))

981

982

channel_id = traverse_obj(

983

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

984

expected_type=str, get_all=False)

985

if not channel_id:

986

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

987

988

overlay_style = traverse_obj(

989

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

990

get_all=False, expected_type=str)

991

badges = self._extract_badges(renderer)

992

993

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

994

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

995

expected_type=str)) or ''

996

url = f'https://www.youtube.com/watch?v={video_id}'

997

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

998

url = f'https://www.youtube.com/shorts/{video_id}'

999

1000

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

1001

or self._get_text(reel_header_renderer, 'timestampText') or '')

1002

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

1003

1004

live_status = (

1005

'is_upcoming' if scheduled_timestamp is not None

1006

else 'was_live' if 'streamed' in time_text.lower()

1007

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

1008

else None)

1009

1010

# videoInfo is a string like '50K views • 10 years ago'.

1011

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

1012

view_count = (0 if 'no views' in view_count_text.lower()

1013

else self._get_count({'simpleText': view_count_text}))

1014

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

1023

'duration': duration,

1024

'channel_id': channel_id,

1025

'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')

1026

or self._get_text(reel_header_renderer, 'channelTitleText')),

1027

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

1028

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

1029

'timestamp': (self._parse_time_text(time_text)

1030

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

1031

else None),

1032

'release_timestamp': scheduled_timestamp,

1033

'availability':

1034

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

1035

else self._availability(

1036

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

1037

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

1038

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

1039

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

1040

view_count_field: view_count,

1041

'live_status': live_status

}

class YoutubeIE(YoutubeBaseInfoExtractor):

1046

IE_DESC = 'YouTube'

1047

_VALID_URL = r"""(?x)^

1048

(

1049

(?:https?://|//) # http(s):// or protocol-independent URL

1050

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1051

(?:www\.)?deturl\.com/www\.youtube\.com|

1052

(?:www\.)?pwnyoutube\.com|

1053

(?:www\.)?hooktube\.com|

1054

(?:www\.)?yourepeat\.com|

1055

tube\.majestyc\.net|

1056

%(invidious)s|

1057

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1058

(?:.*?\#/)? # handle anchor (#/) redirect urls

1059

(?: # the various things that can precede the ID:

1060

1061

|(?: # or the v= param in all its forms

1062

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1063

(?:\?|\#!?) # the params delimiter ? or # or #!

1064

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1070

vid\.plus| # or vid.plus/xxxx

1071

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1072

%(invidious)s

1073

)/

1074

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1075

)

1076

)? # all until now is optional -> you can pass the naked ID

1077

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1078

(?(1).+)? # if we found the ID, everything can follow

1079

(?:\#|$)""" % {

1080

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1094

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1095

\1''',

1096

# https://wordpress.org/plugins/lazy-load-for-videos/

1097

r'''(?xs)

1098

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1099

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1100

]

1101

_RETURN_TYPE = 'video' # XXX: How to handle multifeed?

1102

1103

_PLAYER_INFO_RE = (

1104

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1105

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1106

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1107

)

1108

_formats = {

1109

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1110

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1111

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1112

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1113

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1114

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1115

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1116

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1117

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1118

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1119

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1120

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1121

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1122

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1123

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1124

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1125

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1126

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1131

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1132

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1133

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1134

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1135

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1136

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1137

1138

# Apple HTTP Live Streaming

1139

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1140

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1141

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1142

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1143

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1144

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1145

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1146

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1147

1148

# DASH mp4 video

1149

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1150

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1151

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1152

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1153

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1154

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1155

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1156

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1157

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1158

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1159

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1160

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1161

1162

# Dash mp4 audio

1163

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1164

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1165

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1166

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1167

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1168

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1169

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1170

1171

# Dash webm

1172

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1173

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1174

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1175

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1176

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1177

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1178

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1179

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1180

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1181

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1182

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1183

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1184

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1185

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1186

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1187

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1188

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1189

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1190

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1191

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1192

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1193

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1194

1195

# Dash webm audio

1196

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1197

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1198

1199

# Dash webm audio with opus inside

1200

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1201

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1202

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1203

1204

# RTMP (unnamed)

1205

'_rtmp': {'protocol': 'rtmp'},

1206

1207

# av01 video only formats sometimes served with "unknown" codecs

1208

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1209

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1210

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1211

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1212

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1213

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1214

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1215

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1216

}

1217

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1229

'uploader': 'Philipp Hagemeister',

1230

'uploader_id': 'phihag',

1231

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1232

'channel': 'Philipp Hagemeister',

1233

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1234

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1235

'upload_date': '20121002',

1236

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1237

'categories': ['Science & Technology'],

1238

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1243

'playable_in_embed': True,

1244

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1245

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1250

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1255

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1260

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1261

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1262

'uploader': 'SET India',

1263

'uploader_id': 'setindia',

1264

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1265

'age_limit': 18,

1266

},

1267

'skip': 'Private video',

1268

},

1269

{

1270

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1271

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1276

'uploader': 'Philipp Hagemeister',

1277

'uploader_id': 'phihag',

1278

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1279

'channel': 'Philipp Hagemeister',

1280

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1281

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1282

'upload_date': '20121002',

1283

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1284

'categories': ['Science & Technology'],

1285

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1290

'playable_in_embed': True,

1291

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1292

'live_status': 'not_live',

1293

'age_limit': 0,

1294

'comment_count': int,

1295

'channel_follower_count': int

1296

},

1297

'params': {

1298

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1303

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1308

'uploader_id': '8KVIDEO',

1309

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1310

'description': '',

1311

'uploader': '8KVIDEO',

1312

'title': 'UHDTV TEST 8K VIDEO.mp4'

1313

},

1314

'params': {

1315

'youtube_include_dash_manifest': True,

1316

'format': '141',

1317

},

1318

'skip': 'format 141 not served anymore',

1319

},

1320

# DASH manifest with encrypted signature

1321

{

1322

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1327

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1328

'duration': 244,

1329

'uploader': 'AfrojackVEVO',

1330

'uploader_id': 'AfrojackVEVO',

1331

'upload_date': '20131011',

1332

'abr': 129.495,

1333

'like_count': int,

1334

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1335

'playable_in_embed': True,

1336

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1337

'view_count': int,

1338

'track': 'The Spark',

1339

'live_status': 'not_live',

1340

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1341

'channel': 'Afrojack',

1342

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1343

'tags': 'count:19',

1344

'availability': 'public',

1345

'categories': ['Music'],

1346

'age_limit': 0,

1347

'alt_title': 'The Spark',

1348

'channel_follower_count': int

1349

},

1350

'params': {

1351

'youtube_include_dash_manifest': True,

1352

'format': '141/bestaudio[ext=m4a]',

1353

},

1354

},

1355

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1356

{

1357

'note': 'Embed allowed age-gate video',

1358

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1363

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1364

'duration': 142,

1365

'uploader': 'The Witcher',

1366

'uploader_id': 'WitcherGame',

1367

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1368

'upload_date': '20140605',

1369

'age_limit': 18,

1370

'categories': ['Gaming'],

1371

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1372

'availability': 'needs_auth',

1373

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1374

'like_count': int,

1375

'channel': 'The Witcher',

1376

'live_status': 'not_live',

1377

'tags': 'count:17',

1378

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1379

'playable_in_embed': True,

1380

'view_count': int,

1381

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1386

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1391

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1392

'upload_date': '20200408',

1393

'uploader_id': 'FlyingKitty900',

1394

'uploader': 'FlyingKitty',

1395

'age_limit': 18,

1396

'availability': 'needs_auth',

1397

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1398

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1399

'channel': 'FlyingKitty',

1400

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1401

'view_count': int,

1402

'categories': ['Entertainment'],

1403

'live_status': 'not_live',

1404

'tags': ['Flyingkitty', 'godzilla 2'],

1405

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1406

'like_count': int,

1407

'duration': 177,

1408

'playable_in_embed': True,

1409

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1414

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1415

'info_dict': {

1416

'id': 'Tq92D6wQ1mg',

1417

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1418

'ext': 'mp4',

1419

'upload_date': '20191228',

1420

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1421

'uploader': 'Projekt Melody',

1422

'description': 'md5:17eccca93a786d51bc67646756894066',

1423

'age_limit': 18,

1424

'like_count': int,

1425

'availability': 'needs_auth',

1426

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1427

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1428

'view_count': int,

1429

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1430

'channel': 'Projekt Melody',

1431

'live_status': 'not_live',

1432

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1433

'playable_in_embed': True,

1434

'categories': ['Entertainment'],

1435

'duration': 106,

1436

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1437

'comment_count': int,

1438

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1443

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1448

'uploader': 'Herr Lurik',

1449

'uploader_id': 'st3in234',

1450

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1451

'upload_date': '20130730',

1452

'track': 'Such mich find mich',

1453

'age_limit': 0,

1454

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1455

'like_count': int,

1456

'playable_in_embed': False,

1457

'creator': 'OOMPH!',

1458

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1459

'view_count': int,

1460

'alt_title': 'Such mich find mich',

1461

'duration': 210,

1462

'channel': 'Herr Lurik',

1463

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1464

'categories': ['Music'],

1465

'availability': 'public',

1466

'uploader_url': 'http://www.youtube.com/user/st3in234',

1467

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1468

'live_status': 'not_live',

1469

'artist': 'OOMPH!',

1470

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1475

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1476

'only_matching': True,

1477

},

1478

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1479

# YouTube Red ad is not captured for creator

1480

{

1481

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1487

'uploader_id': 'deadmau5',

1488

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1489

'creator': 'deadmau5',

1490

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1491

'uploader': 'deadmau5',

1492

'title': 'Deadmau5 - Some Chords (HD)',

1493

'alt_title': 'Some Chords',

1494

'availability': 'public',

1495

'tags': 'count:14',

1496

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1497

'view_count': int,

1498

'live_status': 'not_live',

1499

'channel': 'deadmau5',

1500

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1501

'like_count': int,

1502

'track': 'Some Chords',

1503

'artist': 'deadmau5',

1504

'playable_in_embed': True,

1505

'age_limit': 0,

1506

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1507

'categories': ['Music'],

1508

'album': 'Some Chords',

1509

'channel_follower_count': int

1510

},

1511

'expected_warnings': [

1512

'DASH manifest missing',

1513

]

1514

},

1515

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1516

{

1517

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1523

'uploader_id': 'olympic',

1524

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1525

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1526

'uploader': 'Olympics',

1527

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1528

'like_count': int,

1529

'release_timestamp': 1343767800,

1530

'playable_in_embed': True,

1531

'categories': ['Sports'],

1532

'release_date': '20120731',

1533

'channel': 'Olympics',

1534

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1535

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1536

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1537

'age_limit': 0,

1538

'availability': 'public',

1539

'live_status': 'was_live',

1540

'view_count': int,

1541

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1542

'channel_follower_count': int

1543

},

1544

'params': {

1545

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1555

'duration': 85,

1556

'upload_date': '20110310',

1557

'uploader_id': 'AllenMeow',

1558

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1559

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1560

'uploader': '孫ᄋᄅ',

1561

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1562

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1567

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1568

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1569

'view_count': int,

1570

'categories': ['People & Blogs'],

1571

'like_count': int,

1572

'live_status': 'not_live',

1573

'availability': 'unlisted',

1574

'comment_count': int,

1575

'channel_follower_count': int

1576

},

1577

},

1578

# url_encoded_fmt_stream_map is empty string

1579

{

1580

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1585

'description': '',

1586

'upload_date': '20150404',

1587

'uploader_id': 'spbelect',

1588

'uploader': 'Наблюдатели Петербурга',

1589

},

1590

'params': {

1591

'skip_download': 'requires avconv',

1592

},

1593

'skip': 'This live event has ended.',

1594

},

1595

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1596

{

1597

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1602

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1603

'duration': 220,

1604

'upload_date': '20150625',

1605

'uploader_id': 'dorappi2000',

1606

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1607

'uploader': 'dorappi2000',

1608

'formats': 'mincount:31',

1609

},

1610

'skip': 'not actual anymore',

1611

},

1612

# DASH manifest with segment_list

1613

{

1614

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1615

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1620

'uploader': 'Airtek',

1621

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1622

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1623

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1624

},

1625

'params': {

1626

'youtube_include_dash_manifest': True,

1627

'format': '135', # bestvideo

1628

},

1629

'skip': 'This live event has ended.',

1630

},

1631

{

1632

# Multifeed videos (multiple cameras), URL can be of any Camera

1633

'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',

1634

'info_dict': {

1635

'id': 'zaPI8MvL8pg',

1636

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',

1637

'description': 'md5:563ccbc698b39298481ca3c571169519',

},

'playlist': [{

'info_dict': {

'id': 'j5yGuxZ8lLU',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',

1644

'uploader': 'WiiLikeToPlay',

1645

'description': 'md5:563ccbc698b39298481ca3c571169519',

1646

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1647

'duration': 10120,

1648

'channel_follower_count': int,

1649

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1650

'availability': 'public',

1651

'playable_in_embed': True,

1652

'upload_date': '20131105',

1653

'uploader_id': 'WiiRikeToPray',

1654

'categories': ['Gaming'],

1655

'live_status': 'was_live',

1656

'tags': 'count:24',

1657

'release_timestamp': 1383701910,

1658

'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',

1659

'comment_count': int,

1660

'age_limit': 0,

1661

'like_count': int,

1662

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1663

'channel': 'WiiLikeToPlay',

1664

'view_count': int,

1665

'release_date': '20131106',

},

}, {

'info_dict': {

'id': 'zaPI8MvL8pg',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',

1672

'uploader_id': 'WiiRikeToPray',

1673

'availability': 'public',

1674

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1675

'channel': 'WiiLikeToPlay',

1676

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1677

'channel_follower_count': int,

1678

'description': 'md5:563ccbc698b39298481ca3c571169519',

'duration': 10108,

'age_limit': 0,

'like_count': int,

'tags': 'count:24',

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1684

'uploader': 'WiiLikeToPlay',

1685

'release_timestamp': 1383701915,

1686

'comment_count': int,

1687

'upload_date': '20131105',

1688

'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',

1689

'release_date': '20131106',

1690

'playable_in_embed': True,

1691

'live_status': 'was_live',

1692

'categories': ['Gaming'],

'view_count': int,

},

}, {

'info_dict': {

'id': 'R7r3vfO7Hao',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',

1700

'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',

1701

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1702

'like_count': int,

1703

'availability': 'public',

1704

'playable_in_embed': True,

1705

'upload_date': '20131105',

1706

'description': 'md5:563ccbc698b39298481ca3c571169519',

1707

'uploader_id': 'WiiRikeToPray',

1708

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1709

'channel_follower_count': int,

1710

'tags': 'count:24',

1711

'release_date': '20131106',

1712

'uploader': 'WiiLikeToPlay',

1713

'comment_count': int,

1714

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1715

'channel': 'WiiLikeToPlay',

1716

'categories': ['Gaming'],

1717

'release_timestamp': 1383701914,

1718

'live_status': 'was_live',

'age_limit': 0,

'duration': 10128,

'view_count': int,

},

}],

'params': {'skip_download': True},

1725

},

1726

{

1727

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1728

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1729

'info_dict': {

1730

'id': 'gVfLd0zydlo',

1731

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1732

},

1733

'playlist_count': 2,

1734

'skip': 'Not multifeed anymore',

1735

},

1736

{

1737

'url': 'https://vid.plus/FlRa-iH7PGw',

1738

'only_matching': True,

1739

},

1740

{

1741

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1742

'only_matching': True,

1743

},

1744

{

1745

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1746

# Also tests cut-off URL expansion in video description (see

1747

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1748

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1749

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1754

'alt_title': 'Dark Walk',

1755

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1756

'duration': 133,

1757

'upload_date': '20151119',

1758

'uploader_id': 'IronSoulElf',

1759

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1760

'uploader': 'IronSoulElf',

1761

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1762

'track': 'Dark Walk',

1763

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1764

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1765

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1766

'categories': ['Film & Animation'],

1767

'view_count': int,

1768

'live_status': 'not_live',

1769

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1770

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1771

'tags': 'count:13',

1772

'availability': 'public',

1773

'channel': 'IronSoulElf',

1774

'playable_in_embed': True,

1775

'like_count': int,

1776

'age_limit': 0,

1777

'channel_follower_count': int

1778

},

1779

'params': {

1780

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1785

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1786

'only_matching': True,

1787

},

1788

{

1789

# Video with yt:stretch=17:0

1790

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1795

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1796

'upload_date': '20151107',

1797

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1798

'uploader': 'CH GAMER DROID',

1799

},

1800

'params': {

1801

'skip_download': True,

1802

},

1803

'skip': 'This video does not exist.',

1804

},

1805

{

1806

# Video with incomplete 'yt:stretch=16:'

1807

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1808

'only_matching': True,

1809

},

1810

{

1811

# Video licensed under Creative Commons

1812

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1817

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1818

'duration': 721,

1819

'upload_date': '20150128',

1820

'uploader_id': 'BerkmanCenter',

1821

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1822

'uploader': 'The Berkman Klein Center for Internet & Society',

1823

'license': 'Creative Commons Attribution license (reuse allowed)',

1824

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1825

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1826

'like_count': int,

1827

'age_limit': 0,

1828

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1829

'channel': 'The Berkman Klein Center for Internet & Society',

1830

'availability': 'public',

1831

'view_count': int,

1832

'categories': ['Education'],

1833

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1834

'live_status': 'not_live',

1835

'playable_in_embed': True,

1836

'comment_count': int,

1837

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1846

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1851

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1852

'duration': 4060,

1853

'upload_date': '20151120',

1854

'uploader': 'Bernie Sanders',

1855

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1856

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1857

'license': 'Creative Commons Attribution license (reuse allowed)',

1858

'playable_in_embed': True,

1859

'tags': 'count:12',

1860

'like_count': int,

1861

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1862

'age_limit': 0,

1863

'availability': 'public',

1864

'categories': ['News & Politics'],

1865

'channel': 'Bernie Sanders',

1866

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1867

'view_count': int,

1868

'live_status': 'not_live',

1869

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1870

'comment_count': int,

1871

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1880

'only_matching': True,

1881

},

1882

{

1883

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1884

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1885

'only_matching': True,

1886

},

1887

{

1888

# Rental video preview

1889

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1894

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1895

'upload_date': '20150811',

1896

'uploader': 'FlixMatrix',

1897

'uploader_id': 'FlixMatrixKaravan',

1898

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1899

'license': 'Standard YouTube License',

1900

},

1901

'params': {

1902

'skip_download': True,

1903

},

1904

'skip': 'This video is not available.',

1905

},

1906

{

1907

# YouTube Red video with episode data

1908

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1913

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1914

'duration': 2085,

1915

'upload_date': '20170118',

1916

'uploader': 'Vsauce',

1917

'uploader_id': 'Vsauce',

1918

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1919

'series': 'Mind Field',

1920

'season_number': 1,

1921

'episode_number': 1,

1922

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1923

'tags': 'count:12',

1924

'view_count': int,

1925

'availability': 'public',

1926

'age_limit': 0,

1927

'channel': 'Vsauce',

1928

'episode': 'Episode 1',

1929

'categories': ['Entertainment'],

1930

'season': 'Season 1',

1931

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1932

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1933

'like_count': int,

1934

'playable_in_embed': True,

1935

'live_status': 'not_live',

1936

'channel_follower_count': int

1937

},

1938

'params': {

1939

'skip_download': True,

1940

},

1941

'expected_warnings': [

1942

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1947

# as inappropriate or offensive to some audiences.

1948

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1953

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1954

'duration': 965,

1955

'upload_date': '20140124',

1956

'uploader': 'New Century Foundation',

1957

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1958

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1959

},

1960

'params': {

1961

'skip_download': True,

1962

},

1963

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1968

'only_matching': True,

1969

},

1970

{

1971

# geo restricted to JP

1972

'url': 'sJL6WA-aGkQ',

1973

'only_matching': True,

1974

},

1975

{

1976

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1977

'only_matching': True,

1978

},

1979

{

1980

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1981

'only_matching': True,

1982

},

1983

{

1984

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1985

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1986

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1991

'only_matching': True,

1992

},

1993

{

1994

# Video with unsupported adaptive stream type formats

1995

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

2000

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

2001

'duration': 433,

2002

'upload_date': '20130923',

2003

'uploader': 'Amelia Putri Harwita',

2004

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

2005

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

2006

'formats': 'maxcount:10',

2007

},

2008

'params': {

2009

'skip_download': True,

2010

'youtube_include_dash_manifest': False,

2011

},

2012

'skip': 'not actual anymore',

2013

},

2014

{

2015

# Youtube Music Auto-generated description

2016

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

2021

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

2022

'upload_date': '20190312',

2023

'uploader': 'Stephen - Topic',

2024

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

2025

'artist': 'Stephen',

2026

'track': 'Voyeur Girl',

2027

'album': 'it\'s too much love to know my dear',

2028

'release_date': '20190313',

2029

'release_year': 2019,

2030

'alt_title': 'Voyeur Girl',

2031

'view_count': int,

2032

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

2033

'playable_in_embed': True,

2034

'like_count': int,

2035

'categories': ['Music'],

2036

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

2037

'channel': 'Stephen',

2038

'availability': 'public',

2039

'creator': 'Stephen',

2040

'duration': 169,

2041

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

2042

'age_limit': 0,

2043

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

2044

'tags': 'count:11',

2045

'live_status': 'not_live',

2046

'channel_follower_count': int

2047

},

2048

'params': {

2049

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

2054

'only_matching': True,

2055

},

2056

{

2057

# invalid -> valid video id redirection

2058

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

2063

'description': 'md5:bf577a41da97918e94fa9798d9228825',

2064

'upload_date': '20090125',

2065

'uploader': 'Prochorowka',

2066

'uploader_id': 'Prochorowka',

2067

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

2068

'artist': 'Panjabi MC',

2069

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

2070

'album': 'Beware of the Boys (Mundian To Bach Ke)',

2071

},

2072

'params': {

2073

'skip_download': True,

2074

},

2075

'skip': 'Video unavailable',

2076

},

2077

{

2078

# empty description results in an empty string

2079

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2086

'uploader_id': 'ElevageOrVert',

2087

'uploader': 'ElevageOrVert',

2088

'view_count': int,

2089

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2090

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

2091

'like_count': int,

2092

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2093

'tags': [],

2094

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2095

'availability': 'public',

2096

'age_limit': 0,

2097

'categories': ['Pets & Animals'],

2098

'duration': 7,

2099

'playable_in_embed': True,

2100

'live_status': 'not_live',

2101

'channel': 'ElevageOrVert',

2102

'channel_follower_count': int

2103

},

2104

'params': {

2105

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2110

# see [2] for an example with '};' inside ytInitialPlayerResponse

2111

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2112

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2113

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2118

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2119

'upload_date': '20130831',

2120

'uploader_id': 'kudvenkat',

2121

'uploader': 'kudvenkat',

2122

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2123

'like_count': int,

2124

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2125

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2126

'live_status': 'not_live',

2127

'categories': ['Education'],

2128

'availability': 'public',

2129

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2130

'tags': 'count:12',

2131

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2136

'comment_count': int,

2137

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2146

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2147

'only_matching': True,

2148

},

2149

{

2150

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2151

'only_matching': True,

2152

},

2153

{

2154

# https://github.com/ytdl-org/youtube-dl/pull/28094

2155

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2161

'upload_date': '20141120',

2162

'uploader': 'The Cinematic Orchestra - Topic',

2163

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2164

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2165

'artist': 'The Cinematic Orchestra',

2166

'track': 'Burn Out',

2167

'album': 'Every Day',

2168

'like_count': int,

2169

'live_status': 'not_live',

2170

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2175

'creator': 'The Cinematic Orchestra',

2176

'channel': 'The Cinematic Orchestra',

2177

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2178

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2179

'availability': 'public',

2180

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2181

'categories': ['Music'],

2182

'playable_in_embed': True,

2183

'channel_follower_count': int

2184

},

2185

'params': {

2186

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2191

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2192

'only_matching': True,

2193

},

2194

{

2195

# controversial video, requires bpctr/contentCheckOk

2196

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2201

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2202

'uploader': 'CBS Mornings',

2203

'uploader_id': 'CBSThisMorning',

2204

'upload_date': '20140716',

2205

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2206

'duration': 170,

2207

'categories': ['News & Politics'],

2208

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2209

'view_count': int,

2210

'channel': 'CBS Mornings',

2211

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2212

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2213

'age_limit': 18,

2214

'availability': 'needs_auth',

2215

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2216

'like_count': int,

2217

'live_status': 'not_live',

2218

'playable_in_embed': True,

2219

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2224

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2229

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2230

'upload_date': '20201120',

2231

'uploader': 'Walk around Japan',

2232

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2233

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2234

'duration': 1456,

2235

'categories': ['Travel & Events'],

2236

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2237

'view_count': int,

2238

'channel': 'Walk around Japan',

2239

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2240

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2241

'age_limit': 0,

2242

'availability': 'public',

2243

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2244

'live_status': 'not_live',

2245

'playable_in_embed': True,

2246

'channel_follower_count': int

2247

},

2248

'params': {

2249

'skip_download': True,

2250

},

2251

}, {

2252

# Has multiple audio streams

2253

'url': 'WaOKSUlf4TM',

2254

'only_matching': True

2255

}, {

2256

# Requires Premium: has format 141 when requested using YTM url

2257

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2258

'only_matching': True

2259

}, {

2260

# multiple subtitles with same lang_code

2261

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2262

'only_matching': True,

2263

}, {

2264

# Force use android client fallback

2265

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2266

'info_dict': {

2267

'id': 'YOelRv7fMxY',

2268

'title': 'DIGGING A SECRET TUNNEL Part 1',

2269

'ext': '3gp',

2270

'upload_date': '20210624',

2271

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2272

'uploader': 'colinfurze',

2273

'uploader_id': 'colinfurze',

2274

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2275

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2276

'duration': 596,

2277

'categories': ['Entertainment'],

2278

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2279

'view_count': int,

2280

'channel': 'colinfurze',

2281

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2282

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2283

'age_limit': 0,

2284

'availability': 'public',

2285

'like_count': int,

2286

'live_status': 'not_live',

2287

'playable_in_embed': True,

2288

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2293

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2298

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2299

'only_matching': True,

2300

'params': {

2301

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2306

'only_matching': True,

2307

}, {

2308

'note': 'Storyboards',

2309

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2315

'uploader_id': 'scishow',

2316

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2317

'upload_date': '20140324',

2318

'uploader': 'SciShow',

2319

'like_count': int,

2320

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2321

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2322

'view_count': int,

2323

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2324

'playable_in_embed': True,

2325

'tags': 'count:12',

2326

'uploader_url': 'http://www.youtube.com/user/scishow',

2327

'availability': 'public',

2328

'channel': 'SciShow',

2329

'live_status': 'not_live',

2330

'duration': 248,

2331

'categories': ['Education'],

2332

'age_limit': 0,

2333

'channel_follower_count': int,

2334

'chapters': list,

2335

}, 'params': {'format': 'mhtml', 'skip_download': True}

2336

}, {

2337

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2338

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2343

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2344

'uploader': 'Leon Nguyen',

2345

'uploader_id': 'VNSXIII',

2346

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2347

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2348

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2353

'tags': 'count:23',

2354

'playable_in_embed': True,

2355

'live_status': 'not_live',

2356

'upload_date': '20220103',

2357

'like_count': int,

2358

'availability': 'public',

2359

'channel': 'Leon Nguyen',

2360

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2361

'comment_count': int,

2362

'channel_follower_count': int

2363

}

2364

}, {

2365

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2366

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2371

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2372

'uploader': 'Leon Nguyen',

2373

'uploader_id': 'VNSXIII',

2374

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2375

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2376

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2381

'tags': 'count:23',

2382

'playable_in_embed': True,

2383

'live_status': 'not_live',

2384

'upload_date': '20220102',

2385

'like_count': int,

2386

'availability': 'public',

2387

'channel': 'Leon Nguyen',

2388

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2389

'comment_count': int,

2390

'channel_follower_count': int

2391

},

2392

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2393

}, {

2394

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2395

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2400

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2401

'uploader': 'Quackity',

2402

'uploader_id': 'QuackityHQ',

2403

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2404

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2405

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2410

'tags': 'count:26',

2411

'playable_in_embed': True,

2412

'live_status': 'not_live',

2413

'release_timestamp': 1641172509,

2414

'release_date': '20220103',

2415

'upload_date': '20220103',

2416

'like_count': int,

2417

'availability': 'public',

2418

'channel': 'Quackity',

2419

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2420

'channel_follower_count': int

2421

}

2422

},

2423

{ # continuous livestream. Microformat upload date should be preferred.

2424

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2425

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2426

'info_dict': {

2427

'id': 'kgx4WGK0oNU',

2428

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2429

'ext': 'mp4',

2430

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2431

'availability': 'public',

2432

'age_limit': 0,

2433

'release_timestamp': 1637975704,

2434

'upload_date': '20210619',

2435

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2436

'live_status': 'is_live',

2437

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2438

'uploader': '阿鲍Abao',

2439

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2440

'channel': 'Abao in Tokyo',

2441

'channel_follower_count': int,

2442

'release_date': '20211127',

2443

'tags': 'count:39',

2444

'categories': ['People & Blogs'],

2445

'like_count': int,

2446

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2447

'view_count': int,

2448

'playable_in_embed': True,

2449

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2450

'concurrent_view_count': int,

2451

},

2452

'params': {'skip_download': True}

2453

}, {

2454

# Story. Requires specific player params to work.

2455

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2460

'view_count': int,

2461

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2462

'upload_date': '20220526',

2463

'categories': ['Education'],

2464

'title': 'Story',

2465

'channel': 'IT\'S HISTORY',

2466

'description': '',

2467

'uploader_id': 'BlastfromthePast',

2468

'duration': 12,

2469

'uploader': 'IT\'S HISTORY',

2470

'playable_in_embed': True,

2471

'age_limit': 0,

2472

'live_status': 'not_live',

2473

'tags': [],

2474

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2475

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2476

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2477

},

2478

'skip': 'stories get removed after some period of time',

2479

}, {

2480

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2485

'upload_date': '20220323',

2486

'like_count': int,

2487

'availability': 'unlisted',

2488

'channel': 'nao20010128nao',

2489

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2490

'age_limit': 0,

2491

'uploader': 'nao20010128nao',

2492

'uploader_id': 'nao20010128nao',

2493

'categories': ['Music'],

2494

'view_count': int,

2495

'description': '',

2496

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2497

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2498

'live_status': 'not_live',

2499

'playable_in_embed': True,

2500

'channel_follower_count': int,

2501

'duration': 6,

2502

'tags': [],

2503

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2504

}

2505

}, {

2506

# Prefer primary title+description language metadata by default

2507

# Do not prefer translated description if primary is empty

2508

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2513

'description': '',

2514

'channel': 'cole-dlp-test-acc',

2515

'tags': [],

2516

'view_count': int,

2517

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2518

'like_count': int,

2519

'playable_in_embed': True,

2520

'availability': 'unlisted',

2521

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2522

'age_limit': 0,

2523

'duration': 5,

2524

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2525

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2526

'live_status': 'not_live',

2527

'upload_date': '20220908',

2528

'categories': ['People & Blogs'],

2529

'uploader': 'cole-dlp-test-acc',

2530

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2531

},

2532

'params': {'skip_download': True}

2533

}, {

2534

# Extractor argument: prefer translated title+description

2535

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2540

'tags': [],

2541

'duration': 5,

2542

'live_status': 'not_live',

2543

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2544

'upload_date': '20220728',

2545

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2546

'view_count': int,

2547

'categories': ['People & Blogs'],

2548

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2549

'title': 'dlp test video title translated (fr)',

2550

'availability': 'public',

2551

'uploader': 'cole-dlp-test-acc',

2552

'age_limit': 0,

2553

'description': 'dlp test video description translated (fr)',

2554

'playable_in_embed': True,

2555

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2556

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2557

},

2558

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2559

'expected_warnings': [r'Preferring "fr" translated fields'],

2560

}, {

2561

'note': '6 channel audio',

2562

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2563

'only_matching': True,

2564

}, {

2565

'note': 'Multiple HLS formats with same itag',

2566

'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',

'info_dict': {

'id': 'kX3nB4PpJko',

'ext': 'mp4',

'categories': ['Entertainment'],

2571

'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',

2572

'uploader_url': 'http://www.youtube.com/user/MrBeast6000',

2573

'live_status': 'not_live',

2574

'duration': 937,

2575

'channel_follower_count': int,

2576

'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',

2577

'title': 'Last To Take Hand Off Jet, Keeps It!',

2578

'channel': 'MrBeast',

2579

'playable_in_embed': True,

2580

'view_count': int,

2581

'upload_date': '20221112',

2582

'uploader': 'MrBeast',

2583

'uploader_id': 'MrBeast6000',

2584

'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',

2585

'age_limit': 0,

2586

'availability': 'public',

2587

'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',

'like_count': int,

'tags': [],

},

'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},

2592

}, {

2593

'note': 'Audio formats with Dynamic Range Compression',

2594

'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',

'info_dict': {

'id': 'Tq92D6wQ1mg',

'ext': 'weba',

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

2599

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2600

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2601

'channel_follower_count': int,

2602

'description': 'md5:17eccca93a786d51bc67646756894066',

2603

'upload_date': '20191228',

2604

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2605

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

2606

'playable_in_embed': True,

2607

'like_count': int,

2608

'categories': ['Entertainment'],

2609

'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',

2610

'age_limit': 18,

2611

'channel': 'Projekt Melody',

2612

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2613

'view_count': int,

2614

'availability': 'needs_auth',

2615

'comment_count': int,

2616

'live_status': 'not_live',

2617

'uploader': 'Projekt Melody',

2618

'duration': 106,

2619

},

2620

'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},

2621

},

2622

{

2623

'url': 'https://www.youtube.com/live/qVv6vCqciTM',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2629

'comment_count': int,

2630

'chapters': 'count:13',

2631

'upload_date': '20221223',

2632

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

2633

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2634

'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2635

'like_count': int,

2636

'release_date': '20221223',

2637

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

2638

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

2639

'view_count': int,

2640

'playable_in_embed': True,

2641

'duration': 4438,

2642

'availability': 'public',

2643

'channel_follower_count': int,

2644

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2645

'categories': ['Entertainment'],

2646

'live_status': 'was_live',

2647

'release_timestamp': 1671793345,

2648

'channel': 'さなちゃんねる',

2649

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

2650

'uploader': 'さなちゃんねる',

},

},

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2657

{

2658

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2659

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2664

'upload_date': '20080526',

2665

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2666

'uploader': 'Christopher Sykes',

2667

'uploader_id': 'ChristopherJSykes',

2668

'age_limit': 0,

2669

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2670

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2671

'playable_in_embed': True,

2672

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2673

'like_count': int,

2674

'comment_count': int,

2675

'channel': 'Christopher Sykes',

2676

'live_status': 'not_live',

2677

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2678

'availability': 'public',

2679

'duration': 195,

2680

'view_count': int,

2681

'categories': ['Science & Technology'],

2682

'channel_follower_count': int,

2683

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2684

},

2685

'params': {

2686

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2693

from ..utils import parse_qs

2694

2695

qs = parse_qs(url)

2696

if qs.get('list', [None])[0]:

2697

return False

2698

return super().suitable(url)

2699

2700

def __init__(self, *args, **kwargs):

2701

super().__init__(*args, **kwargs)

2702

self._code_cache = {}

2703

self._player_cache = {}

2704

2705

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2706

lock = threading.Lock()

2707

start_time = time.time()

2708

formats = [f for f in formats if f.get('is_from_start')]

2709

2710

def refetch_manifest(format_id, delay):

2711

nonlocal formats, start_time, is_live

2712

if time.time() <= start_time + delay:

2713

return

2714

2715

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2716

video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)

2717

microformats = traverse_obj(

2718

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2719

expected_type=dict)

2720

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2721

is_live = live_status == 'is_live'

2722

start_time = time.time()

2723

2724

def mpd_feed(format_id, delay):

2725

"""

2726

@returns (manifest_url, manifest_stream_number, is_live) or None

2727

"""

2728

for retry in self.RetryManager(fatal=False):

2729

with lock:

2730

refetch_manifest(format_id, delay)

2731

2732

f = next((f for f in formats if f['format_id'] == format_id), None)

2733

if not f:

2734

if not is_live:

2735

retry.error = f'{video_id}: Video is no longer live'

2736

else:

2737

retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'

2738

continue

2739

return f['manifest_url'], f['manifest_stream_number'], is_live

return None

for f in formats:

f['is_live'] = is_live

2744

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2745

live_start_time, mpd_feed, not is_live and f.copy())

2746

if is_live:

2747

f['fragments'] = gen

2748

f['protocol'] = 'http_dash_segments_generator'

2749

else:

2750

f['fragments'] = LazyList(gen({}))

2751

del f['is_from_start']

2752

2753

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2754

FETCH_SPAN, MAX_DURATION = 5, 432000

2755

2756

mpd_url, stream_number, is_live = None, None, True

2757

2758

begin_index = 0

2759

download_start_time = ctx.get('start') or time.time()

2760

2761

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2762

if lack_early_segments:

2763

self.report_warning(bug_reports_message(

2764

'Starting download from the last 120 hours of the live stream since '

2765

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2766

lack_early_segments = True

2767

2768

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2769

fragments, fragment_base_url = None, None

2770

2771

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2772

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2773

# Obtain from MPD's maximum seq value

2774

old_mpd_url = mpd_url

2775

last_error = ctx.pop('last_error', None)

2776

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2777

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2778

or (mpd_url, stream_number, False))

2779

if not refresh_sequence:

2780

if expire_fast and not is_live:

2781

return False, last_seq

2782

elif old_mpd_url == mpd_url:

2783

return True, last_seq

2784

if manifestless_orig_fmt:

2785

fmt_info = manifestless_orig_fmt

2786

else:

2787

try:

2788

fmts, _ = self._extract_mpd_formats_and_subtitles(

2789

mpd_url, None, note=False, errnote=False, fatal=False)

2790

except ExtractorError:

2791

fmts = None

2792

if not fmts:

2793

no_fragment_score += 2

2794

return False, last_seq

2795

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2796

fragments = fmt_info['fragments']

2797

fragment_base_url = fmt_info['fragment_base_url']

2798

assert fragment_base_url

2799

2800

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2801

return True, _last_seq

2802

2803

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2804

while is_live:

2805

fetch_time = time.time()

2806

if no_fragment_score > 30:

2807

return

2808

if last_segment_url:

2809

# Obtain from "X-Head-Seqnum" header value from each segment

2810

try:

2811

urlh = self._request_webpage(

2812

last_segment_url, None, note=False, errnote=False, fatal=False)

2813

except ExtractorError:

2814

urlh = None

2815

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2816

if last_seq is None:

2817

no_fragment_score += 2

2818

last_segment_url = None

2819

continue

2820

else:

2821

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2822

no_fragment_score += 2

2823

if not should_continue:

2824

continue

2825

2826

if known_idx > last_seq:

2827

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2833

# skip from the start when it's negative value

2834

known_idx = last_seq + begin_index

2835

if lack_early_segments:

2836

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2837

try:

2838

for idx in range(known_idx, last_seq):

2839

# do not update sequence here or you'll get skipped some part of it

2840

should_continue, _ = _extract_sequence_from_mpd(False, False)

2841

if not should_continue:

2842

known_idx = idx - 1

2843

raise ExtractorError('breaking out of outer loop')

2844

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2845

yield {

2846

'url': last_segment_url,

2847

'fragment_count': last_seq,

2848

}

2849

if known_idx == last_seq:

2850

no_fragment_score += 5

2851

else:

2852

no_fragment_score = 0

2853

known_idx = last_seq

2854

except ExtractorError:

2855

continue

2856

2857

if manifestless_orig_fmt:

2858

# Stop at the first iteration if running for post-live manifestless;

2859

# fragment count no longer increase since it starts

2860

break

2861

2862

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2863

2864

def _extract_player_url(self, *ytcfgs, webpage=None):

2865

player_url = traverse_obj(

2866

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2867

get_all=False, expected_type=str)

2868

if not player_url:

2869

return

2870

return urljoin('https://www.youtube.com', player_url)

2871

2872

def _download_player_url(self, video_id, fatal=False):

2873

res = self._download_webpage(

2874

'https://www.youtube.com/iframe_api',

2875

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2876

if res:

2877

player_version = self._search_regex(

2878

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2879

if player_version:

2880

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2881

2882

def _signature_cache_id(self, example_sig):

2883

""" Return a string representation of a signature """

2884

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2885

2886

@classmethod

2887

def _extract_player_info(cls, player_url):

2888

for player_re in cls._PLAYER_INFO_RE:

2889

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2894

return id_m.group('id')

2895

2896

def _load_player(self, video_id, player_url, fatal=True):

2897

player_id = self._extract_player_info(player_url)

2898

if player_id not in self._code_cache:

2899

code = self._download_webpage(

2900

player_url, video_id, fatal=fatal,

2901

note='Downloading player ' + player_id,

2902

errnote='Download of %s failed' % player_url)

2903

if code:

2904

self._code_cache[player_id] = code

2905

return self._code_cache.get(player_id)

2906

2907

def _extract_signature_function(self, video_id, player_url, example_sig):

2908

player_id = self._extract_player_info(player_url)

2909

2910

# Read from filesystem cache

2911

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2912

assert os.path.basename(func_id) == func_id

2913

2914

self.write_debug(f'Extracting signature function {func_id}')

2915

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2916

2917

if not cache_spec:

2918

code = self._load_player(video_id, player_url)

2919

if code:

2920

res = self._parse_sig_js(code)

2921

test_string = ''.join(map(chr, range(len(example_sig))))

2922

cache_spec = [ord(c) for c in res(test_string)]

2923

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2924

2925

return lambda s: ''.join(s[i] for i in cache_spec)

2926

2927

def _print_sig_code(self, func, example_sig):

2928

if not self.get_param('youtube_print_sig_code'):

2929

return

2930

2931

def gen_sig_code(idxs):

2932

def _genslice(start, end, step):

2933

starts = '' if start == 0 else str(start)

2934

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2935

steps = '' if step == 1 else (':%d' % step)

2936

return f's[{starts}{ends}{steps}]'

2937

2938

step = None

2939

# Quelch pyflakes warnings - start will be set when step is set

2940

start = '(Never used)'

2941

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2946

step = None

2947

continue

2948

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2958

2959

test_string = ''.join(map(chr, range(len(example_sig))))

2960

cache_res = func(test_string)

2961

cache_spec = [ord(c) for c in cache_res]

2962

expr_code = ' + '.join(gen_sig_code(cache_spec))

2963

signature_id_tuple = '(%s)' % (

2964

', '.join(str(len(p)) for p in example_sig.split('.')))

2965

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2966

' return %s\n') % (signature_id_tuple, expr_code)

2967

self.to_screen('Extracted signature function:\n' + code)

2968

2969

def _parse_sig_js(self, jscode):

2970

funcname = self._search_regex(

2971

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2972

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2973

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2974

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2975

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2976

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2977

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2978

# Obsolete patterns

2979

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2980

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2981

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2982

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2983

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2984

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2985

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2986

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2987

jscode, 'Initial JS player signature function name', group='sig')

2988

2989

jsi = JSInterpreter(jscode)

2990

initial_function = jsi.extract_function(funcname)

2991

return lambda s: initial_function([s])

2992

2993

def _cached(self, func, *cache_id):

2994

def inner(*args, **kwargs):

2995

if cache_id not in self._player_cache:

2996

try:

2997

self._player_cache[cache_id] = func(*args, **kwargs)

2998

except ExtractorError as e:

2999

self._player_cache[cache_id] = e

3000

except Exception as e:

3001

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

3002

3003

ret = self._player_cache[cache_id]

3004

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

3010

"""Turn the encrypted s field into a working signature"""

3011

extract_sig = self._cached(

3012

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

3013

func = extract_sig(video_id, player_url, s)

3014

self._print_sig_code(func, s)

3015

return func(s)

3016

3017

def _decrypt_nsig(self, s, video_id, player_url):

3018

"""Turn the encrypted n field into a working signature"""

3019

if player_url is None:

3020

raise ExtractorError('Cannot decrypt nsig without player_url')

3021

player_url = urljoin('https://www.youtube.com', player_url)

3022

3023

try:

3024

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

3025

except ExtractorError as e:

3026

raise ExtractorError('Unable to extract nsig function code', cause=e)

3027

if self.get_param('youtube_print_sig_code'):

3028

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

3029

3030

try:

3031

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

3032

ret = extract_nsig(jsi, func_code)(s)

3033

except JSInterpreter.Exception as e:

3034

try:

3035

jsi = PhantomJSwrapper(self, timeout=5000)

3036

except ExtractorError:

3037

raise e

3038

self.report_warning(

3039

f'Native nsig extraction failed: Trying with PhantomJS\n'

3040

f' n = {s} ; player = {player_url}', video_id)

3041

self.write_debug(e, only_once=True)

3042

3043

args, func_body = func_code

3044

ret = jsi.execute(

3045

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

3046

video_id=video_id, note='Executing signature code').strip()

3047

3048

self.write_debug(f'Decrypted nsig {s} => {ret}')

3049

return ret

3050

3051

def _extract_n_function_name(self, jscode):

3052

funcname, idx = self._search_regex(

3053

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

3054

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

3059

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

3060

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

3061

3062

def _extract_n_function_code(self, video_id, player_url):

3063

player_id = self._extract_player_info(player_url)

3064

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

3065

jscode = func_code or self._load_player(video_id, player_url)

3066

jsi = JSInterpreter(jscode)

3067

3068

if func_code:

3069

return jsi, player_id, func_code

3070

3071

func_name = self._extract_n_function_name(jscode)

3072

3073

# For redundancy

3074

func_code = self._search_regex(

3075

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

3076

# NB: The end of the regex is intentionally kept strict

3077

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

3078

jscode, 'nsig function', group=('var', 'code'), default=None)

3079

if func_code:

3080

func_code = ([func_code[0]], func_code[1])

3081

else:

3082

self.write_debug('Extracting nsig function with jsinterp')

3083

func_code = jsi.extract_function_code(func_name)

3084

3085

self.cache.store('youtube-nsig', player_id, func_code)

3086

return jsi, player_id, func_code

3087

3088

def _extract_n_function_from_code(self, jsi, func_code):

3089

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

3095

raise

3096

except Exception as e:

3097

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

3098

3099

if ret.startswith('enhanced_except_'):

3100

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

3106

"""

3107

Extract signatureTimestamp (sts)

3108

Required to tell API what sig/player version is in use.

3109

"""

3110

sts = None

3111

if isinstance(ytcfg, dict):

3112

sts = int_or_none(ytcfg.get('STS'))

3113

3114

if not sts:

3115

# Attempt to extract from player

3116

if player_url is None:

3117

error_msg = 'Cannot extract signature timestamp without player_url.'

3118

if fatal:

3119

raise ExtractorError(error_msg)

3120

self.report_warning(error_msg)

3121

return

3122

code = self._load_player(video_id, player_url, fatal=fatal)

3123

if code:

3124

sts = int_or_none(self._search_regex(

3125

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

3126

'JS player signature timestamp', group='sts', fatal=fatal))

3127

return sts

3128

3129

def _mark_watched(self, video_id, player_responses):

3130

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

3131

label = 'fully ' if is_full else ''

3132

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

3133

expected_type=url_or_none)

3134

if not url:

3135

self.report_warning(f'Unable to mark {label}watched')

3136

return

3137

parsed_url = urllib.parse.urlparse(url)

3138

qs = urllib.parse.parse_qs(parsed_url.query)

3139

3140

# cpn generation algorithm is reverse engineered from base.js.

3141

# In fact it works even with dummy cpn.

3142

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

3143

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

3144

3145

# # more consistent results setting it to right before the end

3146

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

3157

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

3164

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

3165

3166

self._download_webpage(

3167

url, video_id, f'Marking {label}watched',

3168

'Unable to mark watched', fatal=False)

3169

3170

@classmethod

3171

def _extract_from_webpage(cls, url, webpage):

3172

# Invidious Instances

3173

# https://github.com/yt-dlp/yt-dlp/issues/195

3174

# https://github.com/iv-org/invidious/pull/1730

3175

mobj = re.search(

3176

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3177

webpage)

3178

if mobj:

3179

yield cls.url_result(mobj.group('url'), cls)

3180

raise cls.StopExtraction()

3181

3182

yield from super()._extract_from_webpage(url, webpage)

3183

3184

# lazyYT YouTube embed

3185

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3186

yield cls.url_result(unescapeHTML(id_), cls, id_)

3187

3188

# Wordpress "YouTube Video Importer" plugin

3189

for m in re.findall(r'''(?x)<div[^>]+

3190

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3191

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3192

yield cls.url_result(m[-1], cls, m[-1])

3193

3194

@classmethod

3195

def extract_id(cls, url):

3196

video_id = cls.get_temp_id(url)

3197

if not video_id:

3198

raise ExtractorError(f'Invalid URL: {url}')

3199

return video_id

3200

3201

def _extract_chapters_from_json(self, data, duration):

3202

chapter_list = traverse_obj(

3203

data, (

3204

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3205

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3206

), expected_type=list)

3207

3208

return self._extract_chapters(

3209

chapter_list,

3210

chapter_time=lambda chapter: float_or_none(

3211

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3212

chapter_title=lambda chapter: traverse_obj(

3213

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3214

duration=duration)

3215

3216

def _extract_chapters_from_engagement_panel(self, data, duration):

3217

content_list = traverse_obj(

3218

data,

3219

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3220

expected_type=list)

3221

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3222

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3223

3224

return next(filter(None, (

3225

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3226

chapter_time, chapter_title, duration)

3227

for contents in content_list)), [])

3228

3229

def _extract_chapters_from_description(self, description, duration):

3230

duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'

3231

sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'

3232

return self._extract_chapters(

3233

re.findall(sep_re % (duration_re, r'.+?'), description or ''),

3234

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3235

duration=duration, strict=False) or self._extract_chapters(

3236

re.findall(sep_re % (r'.+?', duration_re), description or ''),

3237

chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],

3238

duration=duration, strict=False)

3239

3240

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3245

'title': chapter_title(chapter),

3246

} for chapter in chapter_list or []]

3247

if not strict:

3248

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3249

3250

chapters = [{'start_time': 0}]

3251

for idx, chapter in enumerate(chapter_list):

3252

if chapter['start_time'] is None:

3253

self.report_warning(f'Incomplete chapter {idx}')

3254

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3255

chapters.append(chapter)

3256

elif chapter not in chapters:

3257

self.report_warning(

3258

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3259

return chapters[1:]

3260

3261

def _extract_comment(self, comment_renderer, parent=None):

3262

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3267

3268

# Timestamp is an estimate calculated from the current time and time_text

3269

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3270

timestamp = self._parse_time_text(time_text)

3271

3272

author = self._get_text(comment_renderer, 'authorText')

3273

author_id = try_get(comment_renderer,

3274

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3275

3276

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3277

lambda x: x['likeCount']), str)) or 0

3278

author_thumbnail = try_get(comment_renderer,

3279

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3280

3281

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3282

is_favorited = 'creatorHeart' in (try_get(

3283

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3288

'time_text': time_text,

3289

'like_count': votes,

3290

'is_favorited': is_favorited,

3291

'author': author,

3292

'author_id': author_id,

3293

'author_thumbnail': author_thumbnail,

3294

'author_is_uploader': author_is_uploader,

3295

'parent': parent or 'root'

3296

}

3297

3298

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3299

3300

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3301

3302

def extract_header(contents):

3303

_continuation = None

3304

for content in contents:

3305

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3306

expected_comment_count = self._get_count(

3307

comments_header_renderer, 'countText', 'commentsCount')

3308

3309

if expected_comment_count:

3310

tracker['est_total'] = expected_comment_count

3311

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3312

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3313

3314

sort_menu_item = try_get(

3315

comments_header_renderer,

3316

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3317

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3318

3319

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3320

if not _continuation:

3321

continue

3322

3323

sort_text = str_or_none(sort_menu_item.get('title'))

3324

if not sort_text:

3325

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3326

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3331

if not parent:

3332

tracker['current_page_thread'] = 0

3333

for content in contents:

3334

if not parent and tracker['total_parent_comments'] >= max_parents:

3335

yield

3336

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3337

comment_renderer = get_first(

3338

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3339

expected_type=dict, default={})

3340

3341

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3346

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3347

yield comment

3348

3349

# Attempt to get the replies

3350

comment_replies_renderer = try_get(

3351

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3352

3353

if comment_replies_renderer:

3354

tracker['current_page_thread'] += 1

3355

comment_entries_iter = self._comment_entries(

3356

comment_replies_renderer, ytcfg, video_id,

3357

parent=comment.get('id'), tracker=tracker)

3358

yield from itertools.islice(comment_entries_iter, min(

3359

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3360

3361

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3367

total_parent_comments=0,

3368

total_reply_comments=0)

3369

3370

# TODO: Deprecated

3371

# YouTube comments have a max depth of 2

3372

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3373

if max_depth:

3374

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3375

'Set max replies in the max-comments extractor argument instead')

3376

if max_depth == 1 and parent:

3377

return

3378

3379

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3380

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3381

3382

continuation = self._extract_continuation(root_continuation_data)

3383

3384

response = None

3385

is_forced_continuation = False

3386

is_first_continuation = parent is None

3387

if is_first_continuation and not continuation:

3388

# Sometimes you can get comments by generating the continuation yourself,

3389

# even if YouTube initially reports them being disabled - e.g. stories comments.

3390

# Note: if the comment section is actually disabled, YouTube may return a response with

3391

# required check_get_keys missing. So we will disable that check initially in this case.

3392

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3393

is_forced_continuation = True

3394

3395

for page_num in itertools.count(0):

3396

if not continuation:

3397

break

3398

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3399

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3400

if page_num == 0:

3401

if is_first_continuation:

3402

note_prefix = 'Downloading comment section API JSON'

3403

else:

3404

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3405

tracker['current_page_thread'], comment_prog_str)

3406

else:

3407

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3408

' ' if parent else '', ' replies' if parent else '',

3409

page_num, comment_prog_str)

3410

try:

3411

response = self._extract_response(

3412

item_id=None, query=continuation,

3413

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3414

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3415

except ExtractorError as e:

3416

# Ignore incomplete data error for replies if retries didn't work.

3417

# This is to allow any other parent comments and comment threads to be downloaded.

3418

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3419

if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:

3420

self.report_warning(

3421

'Received incomplete data for a comment reply thread and retrying did not help. '

3422

'Ignoring to let other comments be downloaded.')

3423

else:

3424

raise

3425

is_forced_continuation = False

3426

continuation_contents = traverse_obj(

3427

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3428

3429

continuation = None

3430

for continuation_section in continuation_contents:

3431

continuation_items = traverse_obj(

3432

continuation_section,

3433

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3434

get_all=False, expected_type=list) or []

3435

if is_first_continuation:

3436

continuation = extract_header(continuation_items)

3437

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3451

if message and not parent and tracker['running_total'] == 0:

3452

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3453

raise self.CommentsDisabled

3454

3455

@staticmethod

3456

def _generate_comment_continuation(video_id):

3457

"""

3458

Generates initial comment section continuation token from given video id

3459

"""

3460

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3461

return base64.b64encode(token.encode()).decode()

3462

3463

def _get_comments(self, ytcfg, video_id, contents, webpage):

3464

"""Entry for comment extraction"""

3465

def _real_comment_extract(contents):

3466

renderer = next((

3467

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3468

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3469

yield from self._comment_entries(renderer, ytcfg, video_id)

3470

3471

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3472

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3473

3474

@staticmethod

3475

def _get_checkok_params():

3476

return {'contentCheckOk': True, 'racyCheckOk': True}

3477

3478

@classmethod

3479

def _generate_player_context(cls, sts=None):

3480

context = {

3481

'html5Preference': 'HTML5_PREF_WANTS',

3482

}

3483

if sts is not None:

3484

context['signatureTimestamp'] = sts

3485

return {

3486

'playbackContext': {

3487

'contentPlaybackContext': context

3488

},

3489

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3494

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3495

return True

3496

3497

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))

3498

AGE_GATE_REASONS = (

3499

'confirm your age', 'age-restricted', 'inappropriate', # reason

3500

'age_verification_required', 'age_check_required', # status

3501

)

3502

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3503

3504

@staticmethod

3505

def _is_unplayable(player_response):

3506

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3507

3508

_STORY_PLAYER_PARAMS = '8AEB'

3509

3510

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3511

3512

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3513

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3514

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3515

headers = self.generate_api_headers(

3516

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3522

yt_query['params'] = self._STORY_PLAYER_PARAMS

3523

3524

yt_query.update(self._generate_player_context(sts))

3525

return self._extract_response(

3526

item_id=video_id, ep='player', query=yt_query,

3527

ytcfg=player_ytcfg, headers=headers, fatal=True,

3528

default_client=client,

3529

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3530

) or None

3531

3532

def _get_requested_clients(self, url, smuggled_data):

3533

requested_clients = []

3534

default = ['android', 'web']

3535

allowed_clients = sorted(

3536

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3537

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3538

for client in self._configuration_arg('player_client'):

3539

if client in allowed_clients:

3540

requested_clients.append(client)

3541

elif client == 'default':

3542

requested_clients.extend(default)

3543

elif client == 'all':

3544

requested_clients.extend(allowed_clients)

3545

else:

3546

self.report_warning(f'Skipping unsupported client {client}')

3547

if not requested_clients:

3548

requested_clients = default

3549

3550

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3551

requested_clients.extend(

3552

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3553

3554

return orderedSet(requested_clients)

3555

3556

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3557

initial_pr = None

3558

if webpage:

3559

initial_pr = self._search_json(

3560

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3561

3562

all_clients = set(clients)

3563

clients = clients[::-1]

3564

prs = []

3565

3566

def append_client(*client_names):

3567

""" Append the first client name that exists but not already used """

3568

for client_name in client_names:

3569

actual_client = _split_innertube_client(client_name)[0]

3570

if actual_client in INNERTUBE_CLIENTS:

3571

if actual_client not in all_clients:

3572

clients.append(client_name)

3573

all_clients.add(actual_client)

3574

return

3575

3576

# Android player_response does not have microFormats which are needed for

3577

# extraction of some data. So we return the initial_pr with formats

3578

# stripped out even if not requested by the user

3579

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3580

if initial_pr:

3581

pr = dict(initial_pr)

3582

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3587

player_url = None

3588

while clients:

3589

client, base_client, variant = _split_innertube_client(clients.pop())

3590

player_ytcfg = master_ytcfg if client == 'web' else {}

3591

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3592

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3593

3594

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3595

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3596

if 'js' in self._configuration_arg('player_skip'):

3597

require_js_player = False

3598

player_url = None

3599

3600

if not player_url and not tried_iframe_fallback and require_js_player:

3601

player_url = self._download_player_url(video_id)

3602

tried_iframe_fallback = True

3603

3604

try:

3605

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3606

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3607

except ExtractorError as e:

3608

if last_error:

3609

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3615

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3616

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3617

if pr_video_id and pr_video_id != video_id:

3618

self.report_warning(

3619

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3624

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3625

append_client(f'{base_client}_creator')

3626

elif self._is_agegated(pr):

3627

if variant == 'tv_embedded':

3628

append_client(f'{base_client}_embedded')

3629

elif not variant:

3630

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3636

return prs, player_url

3637

3638

def _needs_live_processing(self, live_status, duration):

3639

if (live_status == 'is_live' and self.get_param('live_from_start')

3640

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3641

return live_status

3642

3643

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3644

itags, stream_ids = collections.defaultdict(set), []

3645

itag_qualities, res_qualities = {}, {0: None}

3646

q = qualities([

3647

# Normally tiny is the smallest video-only formats. But

3648

# audio-only formats with unknown quality may get tagged as tiny

3649

'tiny',

3650

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3651

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3652

])

3653

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))

3654

3655

for fmt in streaming_formats:

3656

if fmt.get('targetDurationSec'):

3657

continue

3658

3659

itag = str_or_none(fmt.get('itag'))

3660

audio_track = fmt.get('audioTrack') or {}

3661

stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))

3662

if stream_id in stream_ids:

3663

continue

3664

3665

quality = fmt.get('quality')

3666

height = int_or_none(fmt.get('height'))

3667

if quality == 'tiny' or not quality:

3668

quality = fmt.get('audioQuality', '').lower() or quality

3669

# The 3gp format (17) in android client has a quality of "small",

3670

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3676

if height:

3677

res_qualities[height] = quality

3678

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3679

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3680

# number of fragment that would subsequently requested with (`&sq=N`)

3681

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3682

continue

3683

3684

fmt_url = fmt.get('url')

3685

if not fmt_url:

3686

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3687

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3688

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3689

if not all((sc, fmt_url, player_url, encrypted_sig)):

3690

continue

3691

try:

3692

fmt_url += '&%s=%s' % (

3693

traverse_obj(sc, ('sp', -1)) or 'signature',

3694

self._decrypt_signature(encrypted_sig, video_id, player_url)

3695

)

3696

except ExtractorError as e:

3697

self.report_warning('Signature extraction failed: Some formats may be missing',

3698

video_id=video_id, only_once=True)

3699

self.write_debug(e, only_once=True)

3700

continue

3701

3702

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3707

fmt_url = update_url_query(fmt_url, {

3708

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3709

})

3710

except ExtractorError as e:

3711

phantomjs_hint = ''

3712

if isinstance(e, JSInterpreter.Exception):

3713

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3714

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3715

if player_url:

3716

self.report_warning(

3717

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3718

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3719

self.write_debug(e, only_once=True)

3720

else:

3721

self.report_warning(

3722

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3723

video_id=video_id, only_once=True)

3724

throttled = True

3725

3726

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3727

language_preference = (

3728

10 if audio_track.get('audioIsDefault') and 10

3729

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3730

else -1)

3731

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3732

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3733

# Make sure to avoid false positives with small duration differences.

3734

# E.g. __2ABJjxzNo, ySuUZEjARPY

3735

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3736

if is_damaged:

3737

self.report_warning(

3738

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3739

dct = {

3740

'asr': int_or_none(fmt.get('audioSampleRate')),

3741

'filesize': int_or_none(fmt.get('contentLength')),

3742

'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',

3743

'format_note': join_nonempty(

3744

'%s%s' % (audio_track.get('displayName') or '',

3745

' (default)' if language_preference > 0 else ''),

3746

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3747

'DRC' if fmt.get('isDrc') else None,

3748

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3749

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3750

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3751

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3752

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3753

'fps': int_or_none(fmt.get('fps')) or None,

3754

'audio_channels': fmt.get('audioChannels'),

3755

'height': height,

3756

'quality': q(quality) - bool(fmt.get('isDrc')) / 2,

3757

'has_drm': bool(fmt.get('drmFamilies')),

3758

'tbr': tbr,

3759

'url': fmt_url,

3760

'width': int_or_none(fmt.get('width')),

3761

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3762

'desc' if language_preference < -1 else '') or None,

3763

'language_preference': language_preference,

3764

# Strictly de-prioritize damaged and 3gp formats

3765

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3766

}

3767

mime_mobj = re.match(

3768

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3769

if mime_mobj:

3770

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3771

dct.update(parse_codecs(mime_mobj.group(2)))

3772

no_audio = dct.get('acodec') == 'none'

3773

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3779

CHUNK_SIZE = 10 << 20

3780

dct.update({

3781

'protocol': 'http_dash_segments',

3782

'fragments': [{

3783

'url': update_url_query(dct['url'], {

3784

'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, dct["filesize"])}'

3785

})

3786

} for range_start in range(0, dct['filesize'], CHUNK_SIZE)]

3787

} if dct['filesize'] else {

3788

'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful?

})

if dct.get('ext'):

dct['container'] = dct['ext'] + '_dash'

3793

3794

if itag:

3795

itags[itag].add(('https', dct.get('language')))

3796

stream_ids.append(stream_id)

3797

yield dct

3798

3799

needs_live_processing = self._needs_live_processing(live_status, duration)

3800

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3801

3802

skip_manifests = set(self._configuration_arg('skip'))

3803

if (not self.get_param('youtube_include_hls_manifest', True)

3804

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3805

or needs_live_processing and skip_bad_formats):

3806

skip_manifests.add('hls')

3807

3808

if not self.get_param('youtube_include_dash_manifest', True):

3809

skip_manifests.add('dash')

3810

if self._configuration_arg('include_live_dash'):

3811

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3812

'Use include_incomplete_formats extractor argument instead')

3813

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3814

skip_manifests.add('dash')

3815

3816

def process_manifest_format(f, proto, itag):

3817

key = (proto, f.get('language'))

3818

if key in itags[itag]:

return False

itags[itag].add(key)

if any(p != proto for p, _ in itags[itag]):

3823

f['format_id'] = f'{itag}-{proto}'

3824

elif itag:

3825

f['format_id'] = itag

3826

3827

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3828

if f['quality'] == -1 and f.get('height'):

3829

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3834

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3835

if hls_manifest_url:

3836

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3837

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3838

subtitles = self._merge_subtitles(subs, subtitles)

3839

for f in fmts:

3840

if process_manifest_format(f, 'hls', self._search_regex(

3841

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3842

yield f

3843

3844

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3845

if dash_manifest_url:

3846

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3847

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3848

for f in formats:

3849

if process_manifest_format(f, 'dash', f['format_id']):

3850

f['filesize'] = int_or_none(self._search_regex(

3851

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3852

if needs_live_processing:

3853

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3859

spec = get_first(

3860

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3861

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3866

args = args.split('#')

3867

counts = list(map(int_or_none, args[:5]))

3868

if len(args) != 8 or not all(counts):

3869

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3870

continue

3871

width, height, frame_count, cols, rows = counts

3872

N, sigh = args[6:]

3873

3874

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3875

fragment_count = frame_count / (cols * rows)

3876

fragment_duration = duration / fragment_count

3877

yield {

3878

'format_id': f'sb{i}',

3879

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3892

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3893

} for j in range(math.ceil(fragment_count))],

3894

}

3895

3896

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3897

webpage = None

3898

if 'webpage' not in self._configuration_arg('player_skip'):

3899

query = {'bpctr': '9999999999', 'has_verified': '1'}

3900

if smuggled_data.get('is_story'):

3901

query['pp'] = self._STORY_PLAYER_PARAMS

3902

webpage = self._download_webpage(

3903

webpage_url, video_id, fatal=False, query=query)

3904

3905

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3906

3907

player_responses, player_url = self._extract_player_responses(

3908

self._get_requested_clients(url, smuggled_data),

3909

video_id, webpage, master_ytcfg, smuggled_data)

3910

3911

return webpage, master_ytcfg, player_responses, player_url

3912

3913

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3914

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3915

is_live = get_first(video_details, 'isLive')

3916

if is_live is None:

3917

is_live = get_first(live_broadcast_details, 'isLiveNow')

3918

live_content = get_first(video_details, 'isLiveContent')

3919

is_upcoming = get_first(video_details, 'isUpcoming')

3920

post_live = get_first(video_details, 'isPostLiveDvr')

3921

live_status = ('post_live' if post_live

3922

else 'is_live' if is_live

3923

else 'is_upcoming' if is_upcoming

3924

else 'was_live' if live_content

3925

else 'not_live' if False in (is_live, live_content)

3926

else None)

3927

streaming_data = traverse_obj(player_responses, (..., 'streamingData'))

3928

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3929

3930

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3931

3932

def _real_extract(self, url):

3933

url, smuggled_data = unsmuggle_url(url, {})

3934

video_id = self._match_id(url)

3935

3936

base_url = self.http_scheme() + '//www.youtube.com/'

3937

webpage_url = base_url + 'watch?v=' + video_id

3938

3939

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3940

3941

playability_statuses = traverse_obj(

3942

player_responses, (..., 'playabilityStatus'), expected_type=dict)

3943

3944

trailer_video_id = get_first(

3945

playability_statuses,

3946

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3947

expected_type=str)

3948

if trailer_video_id:

3949

return self.url_result(

3950

trailer_video_id, self.ie_key(), trailer_video_id)

3951

3952

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3953

if webpage else (lambda x: None))

3954

3955

video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)

3956

microformats = traverse_obj(

3957

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3958

expected_type=dict)

3959

3960

translated_title = self._get_text(microformats, (..., 'title'))

3961

video_title = (self._preferred_lang and translated_title

3962

or get_first(video_details, 'title') # primary

3963

or translated_title

3964

or search_meta(['og:title', 'twitter:title', 'title']))

3965

translated_description = self._get_text(microformats, (..., 'description'))

3966

original_description = get_first(video_details, 'shortDescription')

3967

video_description = (

3968

self._preferred_lang and translated_description

3969

# If original description is blank, it will be an empty string.

3970

# Do not prefer translated description in this case.

3971

or original_description if original_description is not None else translated_description)

3972

3973

multifeed_metadata_list = get_first(

3974

player_responses,

3975

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3976

expected_type=str)

3977

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3978

if self.get_param('noplaylist'):

3979

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3984

# Unquote should take place before split on comma (,) since textual

3985

# fields may contain comma as well (see

3986

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3987

feed_data = urllib.parse.parse_qs(

3988

urllib.parse.unquote_plus(feed))

3989

3990

def feed_entry(name):

3991

return try_get(

3992

feed_data, lambda x: x[name][0], str)

3993

3994

feed_id = feed_entry('id')

3995

if not feed_id:

3996

continue

3997

feed_title = feed_entry('title')

3998

title = video_title

3999

if feed_title:

4000

title += ' (%s)' % feed_title

4001

entries.append({

4002

'_type': 'url_transparent',

4003

'ie_key': 'Youtube',

4004

'url': smuggle_url(

4005

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

4006

{'force_singlefeed': True}),

4007

'title': title,

4008

})

4009

feed_ids.append(feed_id)

4010

self.to_screen(

4011

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

4012

% (', '.join(feed_ids), video_id))

4013

return self.playlist_result(

4014

entries, video_id, video_title, video_description)

4015

4016

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

4017

or int_or_none(get_first(microformats, 'lengthSeconds'))

4018

or parse_duration(search_meta('duration')) or None)

4019

4020

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

4021

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

4022

if live_status == 'post_live':

4023

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

4024

4025

if not formats:

4026

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

4027

self.report_drm(video_id)

4028

pemr = get_first(

4029

playability_statuses,

4030

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

4031

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

4032

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

4033

if subreason:

4034

if subreason == 'The uploader has not made this video available in your country.':

4035

countries = get_first(microformats, 'availableCountries')

4036

if not countries:

4037

regions_allowed = search_meta('regionsAllowed')

4038

countries = regions_allowed.split(',') if regions_allowed else None

4039

self.raise_geo_restricted(subreason, countries, metadata_available=True)

4040

reason += f'. {subreason}'

4041

if reason:

4042

self.raise_no_formats(reason, expected=True)

4043

4044

keywords = get_first(video_details, 'keywords', expected_type=list) or []

4045

if not keywords and webpage:

4046

keywords = [

4047

unescapeHTML(m.group('content'))

4048

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

4049

for keyword in keywords:

4050

if keyword.startswith('yt:stretch='):

4051

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

4052

if mobj:

4053

# NB: float is intentional for forcing float division

4054

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

4059

f['stretched_ratio'] = ratio

4060

break

4061

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

4062

thumbnail_url = search_meta(['og:image', 'twitter:image'])

4063

if thumbnail_url:

4064

thumbnails.append({

4065

'url': thumbnail_url,

4066

})

4067

original_thumbnails = thumbnails.copy()

4068

4069

# The best resolution thumbnails sometimes does not appear in the webpage

4070

# See: https://github.com/yt-dlp/yt-dlp/issues/340

4071

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

4072

thumbnail_names = [

4073

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

4074

# in resolution, these are not the custom thumbnail. So de-prioritize them

4075

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

4076

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

4077

]

4078

n_thumbnail_names = len(thumbnail_names)

4079

thumbnails.extend({

4080

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

4081

video_id=video_id, name=name, ext=ext,

4082

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

4083

} for name in thumbnail_names for ext in ('webp', 'jpg'))

4084

for thumb in thumbnails:

4085

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

4086

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

4087

self._remove_duplicate_formats(thumbnails)

4088

self._downloader._sort_thumbnails(original_thumbnails)

4089

4090

category = get_first(microformats, 'category') or search_meta('genre')

4091

channel_id = str_or_none(

4092

get_first(video_details, 'channelId')

4093

or get_first(microformats, 'externalChannelId')

4094

or search_meta('channelId'))

4095

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

4096

4097

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

4098

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

4099

if not duration and live_end_time and live_start_time:

4100

duration = live_end_time - live_start_time

4101

4102

needs_live_processing = self._needs_live_processing(live_status, duration)

4103

4104

def is_bad_format(fmt):

4105

if needs_live_processing and not fmt.get('is_from_start'):

4106

return True

4107

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

4108

and fmt.get('protocol') == 'http_dash_segments'):

4109

return True

4110

4111

for fmt in filter(is_bad_format, formats):

4112

fmt['preference'] = (fmt.get('preference') or -1) - 10

4113

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

4114

4115

if needs_live_processing:

4116

self._prepare_live_from_start_formats(

4117

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

4118

4119

formats.extend(self._extract_storyboard(player_responses, duration))

info = {

'id': video_id,

'title': video_title,

4124

'formats': formats,

4125

'thumbnails': thumbnails,

4126

# The best thumbnail that we are sure exists. Prevents unnecessary

4127

# URL checking if user don't care about getting the best possible thumbnail

4128

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

4129

'description': video_description,

4130

'uploader': get_first(video_details, 'author'),

4131

'uploader_id': self._search_regex(r'/(?:channel/|user/|(?=@))([^/?&#]+)', owner_profile_url, 'uploader id', default=None),

4132

'uploader_url': owner_profile_url,

4133

'channel_id': channel_id,

4134

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

4135

'duration': duration,

4136

'view_count': int_or_none(

4137

get_first((video_details, microformats), (..., 'viewCount'))

4138

or search_meta('interactionCount')),

4139

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

4140

'age_limit': 18 if (

4141

get_first(microformats, 'isFamilySafe') is False

4142

or search_meta('isFamilyFriendly') == 'false'

4143

or search_meta('og:restrictions:age') == '18+') else 0,

4144

'webpage_url': webpage_url,

4145

'categories': [category] if category else None,

4146

'tags': keywords,

4147

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

4148

'live_status': live_status,

4149

'release_timestamp': live_start_time,

4150

'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats

4151

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

4156

if pctr:

4157

def get_lang_code(track):

4158

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

4159

or track.get('languageCode'))

4160

4161

# Converted into dicts to remove duplicates

4162

captions = {

4163

get_lang_code(sub): sub

4164

for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}

4165

translation_languages = {

4166

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

4167

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}

4168

4169

def process_language(container, base_url, lang_code, sub_name, query):

4170

lang_subs = container.setdefault(lang_code, [])

4171

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4182

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4183

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4184

for lang_code, caption_track in captions.items():

4185

base_url = caption_track.get('baseUrl')

4186

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4187

if not base_url:

4188

continue

4189

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4190

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4195

if not caption_track.get('isTranslatable'):

4196

continue

4197

for trans_code, trans_name in translation_languages.items():

4198

if not trans_code:

4199

continue

4200

orig_trans_code = trans_code

4201

if caption_track.get('kind') != 'asr' and trans_code != 'und':

4202

if not get_translated_subs:

4203

continue

4204

trans_code += f'-{lang_code}'

4205

trans_name += format_field(lang_name, None, ' from %s')

4206

# Add an "-orig" label to the original language so that it can be distinguished.

4207

# The subs are returned without "-orig" as well for compatibility

4208

if lang_code == f'a-{orig_trans_code}':

4209

process_language(

4210

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4211

# Setting tlang=lang returns damaged subtitles.

4212

process_language(automatic_captions, base_url, trans_code, trans_name,

4213

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4214

4215

info['automatic_captions'] = automatic_captions

4216

info['subtitles'] = subtitles

4217

4218

parsed_url = urllib.parse.urlparse(url)

4219

for component in [parsed_url.fragment, parsed_url.query]:

4220

query = urllib.parse.parse_qs(component)

4221

for k, v in query.items():

4222

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4223

d_k += '_time'

4224

if d_k not in info and k in s_ks:

4225

info[d_k] = parse_duration(query[k][0])

4226

4227

# Youtube Music Auto-generated description

4228

if video_description:

4229

mobj = re.search(

4230

r'''(?xs)

4231

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4232

(?P<album>[^\n]+)

4233

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4234

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4235

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4236

.+\nAuto-generated\ by\ YouTube\.\s*$

4237

''', video_description)

4238

if mobj:

4239

release_year = mobj.group('release_year')

4240

release_date = mobj.group('release_date')

4241

if release_date:

4242

release_date = release_date.replace('-', '')

4243

if not release_year:

4244

release_year = release_date[:4]

4245

info.update({

4246

'album': mobj.group('album'.strip()),

4247

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4248

'track': mobj.group('track').strip(),

4249

'release_date': release_date,

4250

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4256

if not initial_data:

4257

query = {'videoId': video_id}

4258

query.update(self._get_checkok_params())

4259

initial_data = self._extract_response(

4260

item_id=video_id, ep='next', fatal=False,

4261

ytcfg=master_ytcfg, query=query,

4262

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4263

note='Downloading initial data API JSON')

4264

4265

info['comment_count'] = traverse_obj(initial_data, (

4266

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4267

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4268

), (

4269

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4270

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4271

), expected_type=int_or_none, get_all=False)

4272

4273

try: # This will error if there is no livechat

4274

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4275

except (KeyError, IndexError, TypeError):

4276

pass

4277

else:

4278

info.setdefault('subtitles', {})['live_chat'] = [{

4279

# url is needed to set cookies

4280

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4281

'video_id': video_id,

4282

'ext': 'json',

4283

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4284

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4290

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4291

or self._extract_chapters_from_description(video_description, duration)

4292

or None)

4293

4294

contents = traverse_obj(

4295

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4296

expected_type=list, default=[])

4297

4298

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4299

if vpir:

4300

stl = vpir.get('superTitleLink')

4301

if stl:

4302

stl = self._get_text(stl)

4303

if try_get(

4304

vpir,

4305

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4306

info['location'] = stl

4307

else:

4308

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4309

if mobj:

4310

info.update({

4311

'series': mobj.group(1),

4312

'season_number': int(mobj.group(2)),

4313

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, ('toggleButtonRenderer', ...),

4322

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))

4323

for tbr in tbrs:

4324

for getter, regex in [(

4325

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4326

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4327

lambda x: x['accessibility'],

4328

lambda x: x['accessibilityData']['accessibilityData'],

4329

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4330

label = (try_get(tbr, getter, dict) or {}).get('label')

4331

if label:

4332

mobj = re.match(regex, label)

4333

if mobj:

4334

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4335

break

4336

sbr_tooltip = try_get(

4337

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4338

if sbr_tooltip:

4339

like_count, dislike_count = sbr_tooltip.split(' / ')

4340

info.update({

4341

'like_count': str_to_int(like_count),

4342

'dislike_count': str_to_int(dislike_count),

4343

})

4344

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4345

if vcr:

4346

vc = self._get_count(vcr, 'viewCount')

4347

# Upcoming premieres with waiting count are treated as live here

4348

if vcr.get('isLive'):

4349

info['concurrent_view_count'] = vc

4350

elif info.get('view_count') is None:

4351

info['view_count'] = vc

4352

4353

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4354

if vsir:

4355

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4356

info.update({

4357

'channel': self._get_text(vor, 'title'),

4358

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4363

list) or []

4364

multiple_songs = False

4365

for row in rows:

4366

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4367

multiple_songs = True

4368

break

4369

for row in rows:

4370

mrr = row.get('metadataRowRenderer') or {}

4371

mrr_title = mrr.get('title')

4372

if not mrr_title:

4373

continue

4374

mrr_title = self._get_text(mrr, 'title')

4375

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4376

if mrr_title == 'License':

4377

info['license'] = mrr_contents_text

4378

elif not multiple_songs:

4379

if mrr_title == 'Album':

4380

info['album'] = mrr_contents_text

4381

elif mrr_title == 'Artist':

4382

info['artist'] = mrr_contents_text

4383

elif mrr_title == 'Song':

4384

info['track'] = mrr_contents_text

4385

4386

fallbacks = {

4387

'channel': 'uploader',

4388

'channel_id': 'uploader_id',

4389

'channel_url': 'uploader_url',

4390

}

4391

4392

# The upload date for scheduled, live and past live streams / premieres in microformats

4393

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4394

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4395

upload_date = (

4396

unified_strdate(get_first(microformats, 'uploadDate'))

4397

or unified_strdate(search_meta('uploadDate')))

4398

if not upload_date or (

4399

live_status in ('not_live', None)

4400

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4401

):

4402

upload_date = strftime_or_none(

4403

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4404

info['upload_date'] = upload_date

4405

4406

for to, frm in fallbacks.items():

4407

if not info.get(to):

4408

info[to] = info.get(frm)

4409

4410

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4416

4417

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4418

or get_first(video_details, 'isPrivate', expected_type=bool))

4419

4420

info['availability'] = (

4421

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4422

else self._availability(

4423

is_private=is_private,

4424

needs_premium=(

4425

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4426

or False if initial_data and is_private is not None else None),

4427

needs_subscription=(

4428

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4429

or False if initial_data and is_private is not None else None),

4430

needs_auth=info['age_limit'] >= 18,

4431

is_unlisted=None if is_private is None else (

4432

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4433

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4434

4435

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4436

4437

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4443

@staticmethod

4444

def passthrough_smuggled_data(func):

4445

def _smuggle(info, smuggled_data):

4446

if info.get('_type') not in ('url', 'url_transparent'):

4447

return info

4448

if smuggled_data.get('is_music_url'):

4449

parsed_url = urllib.parse.urlparse(info['url'])

4450

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4451

smuggled_data.pop('is_music_url')

4452

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4453

if smuggled_data:

4454

info['url'] = smuggle_url(info['url'], smuggled_data)

4455

return info

4456

4457

@functools.wraps(func)

4458

def wrapper(self, url):

4459

url, smuggled_data = unsmuggle_url(url, {})

4460

if self.is_music_url(url):

4461

smuggled_data['is_music_url'] = True

4462

info_dict = func(self, url, smuggled_data)

4463

if smuggled_data:

4464

_smuggle(info_dict, smuggled_data)

4465

if info_dict.get('entries'):

4466

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

@staticmethod

def _extract_basic_item_renderer(item):

4472

# Modified from _extract_grid_item_renderer

4473

known_basic_renderers = (

4474

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4475

)

4476

for key, renderer in item.items():

4477

if not isinstance(renderer, dict):

4478

continue

4479

elif key in known_basic_renderers:

4480

return renderer

4481

elif key.startswith('grid') and key.endswith('Renderer'):

4482

return renderer

4483

4484

def _extract_channel_renderer(self, renderer):

4485

channel_id = renderer['channelId']

4486

title = self._get_text(renderer, 'title')

4487

channel_url = f'https://www.youtube.com/channel/{channel_id}'

return {

'_type': 'url',

'url': channel_url,

'id': channel_id,

'ie_key': YoutubeTabIE.ie_key(),

4493

'channel': title,

4494

'channel_id': channel_id,

4495

'channel_url': channel_url,

4496

'title': title,

4497

'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),

4498

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

4499

'playlist_count': self._get_count(renderer, 'videoCountText'),

4500

'description': self._get_text(renderer, 'descriptionSnippet'),

4501

}

4502

4503

def _grid_entries(self, grid_renderer):

4504

for item in grid_renderer['items']:

4505

if not isinstance(item, dict):

4506

continue

4507

renderer = self._extract_basic_item_renderer(item)

4508

if not isinstance(renderer, dict):

4509

continue

4510

title = self._get_text(renderer, 'title')

4511

4512

# playlist

4513

playlist_id = renderer.get('playlistId')

4514

if playlist_id:

4515

yield self.url_result(

4516

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4517

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4522

if video_id:

4523

yield self._extract_video(renderer)

4524

continue

4525

# channel

4526

channel_id = renderer.get('channelId')

4527

if channel_id:

4528

yield self._extract_channel_renderer(renderer)

4529

continue

4530

# generic endpoint URL support

4531

ep_url = urljoin('https://www.youtube.com/', try_get(

4532

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4533

str))

4534

if ep_url:

4535

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4536

if ie.suitable(ep_url):

4537

yield self.url_result(

4538

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4539

break

4540

4541

def _music_reponsive_list_entry(self, renderer):

4542

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4543

if video_id:

4544

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4545

ie=YoutubeIE.ie_key(), video_id=video_id)

4546

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4547

if playlist_id:

4548

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4549

if video_id:

4550

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4551

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4552

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4553

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4554

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4555

if browse_id:

4556

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4557

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4558

4559

def _shelf_entries_from_content(self, shelf_renderer):

4560

content = shelf_renderer.get('content')

4561

if not isinstance(content, dict):

4562

return

4563

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4564

if renderer:

4565

# TODO: add support for nested playlists so each shelf is processed

4566

# as separate playlist

4567

# TODO: this includes only first N items

4568

yield from self._grid_entries(renderer)

4569

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4575

ep = try_get(

4576

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4577

str)

4578

shelf_url = urljoin('https://www.youtube.com', ep)

4579

if shelf_url:

4580

# Skipping links to another channels, note that checking for

4581

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4582

# will not work

4583

if skip_channels and '/channels?' in shelf_url:

4584

return

4585

title = self._get_text(shelf_renderer, 'title')

4586

yield self.url_result(shelf_url, video_title=title)

4587

# Shelf may not contain shelf URL, fallback to extraction from content

4588

yield from self._shelf_entries_from_content(shelf_renderer)

4589

4590

def _playlist_entries(self, video_list_renderer):

4591

for content in video_list_renderer['contents']:

4592

if not isinstance(content, dict):

4593

continue

4594

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4595

if not isinstance(renderer, dict):

4596

continue

4597

video_id = renderer.get('videoId')

4598

if not video_id:

4599

continue

4600

yield self._extract_video(renderer)

4601

4602

def _rich_entries(self, rich_grid_renderer):

4603

renderer = traverse_obj(

4604

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4605

video_id = renderer.get('videoId')

4606

if not video_id:

4607

return

4608

yield self._extract_video(renderer)

4609

4610

def _video_entry(self, video_renderer):

4611

video_id = video_renderer.get('videoId')

4612

if video_id:

4613

return self._extract_video(video_renderer)

4614

4615

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4616

url = urljoin('https://youtube.com', traverse_obj(

4617

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4618

if url:

4619

return self.url_result(

4620

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4621

4622

def _post_thread_entries(self, post_thread_renderer):

4623

post_renderer = try_get(

4624

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4625

if not post_renderer:

4626

return

4627

# video attachment

4628

video_renderer = try_get(

4629

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4630

video_id = video_renderer.get('videoId')

4631

if video_id:

4632

entry = self._extract_video(video_renderer)

4633

if entry:

4634

yield entry

4635

# playlist attachment

4636

playlist_id = try_get(

4637

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4638

if playlist_id:

4639

yield self.url_result(

4640

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4641

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4642

# inline video links

4643

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4644

for run in runs:

4645

if not isinstance(run, dict):

4646

continue

4647

ep_url = try_get(

4648

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4649

if not ep_url:

4650

continue

4651

if not YoutubeIE.suitable(ep_url):

4652

continue

4653

ep_video_id = YoutubeIE._match_id(ep_url)

4654

if video_id == ep_video_id:

4655

continue

4656

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4657

4658

def _post_thread_continuation_entries(self, post_thread_continuation):

4659

contents = post_thread_continuation.get('contents')

4660

if not isinstance(contents, list):

4661

return

4662

for content in contents:

4663

renderer = content.get('backstagePostThreadRenderer')

4664

if isinstance(renderer, dict):

4665

yield from self._post_thread_entries(renderer)

4666

continue

4667

renderer = content.get('videoRenderer')

4668

if isinstance(renderer, dict):

4669

yield self._video_entry(renderer)

4670

4671

r''' # unused

4672

def _rich_grid_entries(self, contents):

4673

for content in contents:

4674

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4675

if video_renderer:

4676

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4682

for url in traverse_obj(renderer, (

4683

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4684

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4685

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4686

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4687

4688

def _extract_entries(self, parent_renderer, continuation_list):

4689

# continuation_list is modified in-place with continuation_list = [continuation_token]

4690

continuation_list[:] = [None]

4691

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4692

for content in contents:

4693

if not isinstance(content, dict):

4694

continue

4695

is_renderer = traverse_obj(

4696

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4697

expected_type=dict)

4698

if not is_renderer:

4699

if content.get('richItemRenderer'):

4700

for entry in self._rich_entries(content['richItemRenderer']):

4701

yield entry

4702

continuation_list[0] = self._extract_continuation(parent_renderer)

4703

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4704

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4705

yield from self._report_history_entries(table)

4706

continuation_list[0] = self._extract_continuation(table)

4707

continue

4708

4709

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4710

for isr_content in isr_contents:

4711

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4716

'gridRenderer': self._grid_entries,

4717

'reelShelfRenderer': self._grid_entries,

4718

'shelfRenderer': self._shelf_entries,

4719

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4720

'backstagePostThreadRenderer': self._post_thread_entries,

4721

'videoRenderer': lambda x: [self._video_entry(x)],

4722

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4723

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4724

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4725

}

4726

for key, renderer in isr_content.items():

4727

if key not in known_renderers:

4728

continue

4729

for entry in known_renderers[key](renderer):

4730

if entry:

4731

yield entry

4732

continuation_list[0] = self._extract_continuation(renderer)

4733

break

4734

4735

if not continuation_list[0]:

4736

continuation_list[0] = self._extract_continuation(is_renderer)

4737

4738

if not continuation_list[0]:

4739

continuation_list[0] = self._extract_continuation(parent_renderer)

4740

4741

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4742

continuation_list = [None]

4743

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4744

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4749

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4750

yield from extract_entries(parent_renderer)

4751

continuation = continuation_list[0]

4752

4753

for page_num in itertools.count(1):

4754

if not continuation:

4755

break

4756

headers = self.generate_api_headers(

4757

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4758

response = self._extract_response(

4759

item_id=f'{item_id} page {page_num}',

4760

query=continuation, headers=headers, ytcfg=ytcfg,

4761

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4766

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4767

visitor_data = self._extract_visitor_data(response) or visitor_data

4768

4769

known_renderers = {

4770

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4771

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4772

'gridVideoRenderer': (self._grid_entries, 'items'),

4773

'gridChannelRenderer': (self._grid_entries, 'items'),

4774

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4775

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4776

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4777

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4778

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4779

'playlistVideoListContinuation': (self._playlist_entries, None),

4780

'gridContinuation': (self._grid_entries, None),

4781

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4782

'sectionListContinuation': (extract_entries, None), # for feeds

4783

}

4784

4785

continuation_items = traverse_obj(response, (

4786

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4787

'appendContinuationItemsAction', 'continuationItems'

4788

), 'continuationContents', get_all=False)

4789

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4790

4791

video_items_renderer = None

4792

for key in continuation_item.keys():

4793

if key not in known_renderers:

4794

continue

4795

func, parent_key = known_renderers[key]

4796

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4797

continuation_list = [None]

4798

yield from func(video_items_renderer)

4799

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4800

4801

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4806

for tab_renderer in tabs:

4807

if tab_renderer.get('selected'):

4808

return tab_renderer

4809

if fatal:

4810

raise ExtractorError('Unable to find selected tab')

4811

4812

@staticmethod

4813

def _extract_tab_renderers(response):

4814

return traverse_obj(

4815

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

4816

4817

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4818

metadata = self._extract_metadata_from_tabs(item_id, data)

4819

4820

selected_tab = self._extract_selected_tab(tabs)

4821

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

4822

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

4823

4824

return self.playlist_result(

4825

self._entries(

4826

selected_tab, metadata['id'], ytcfg,

4827

self._extract_account_syncid(ytcfg, data),

4828

self._extract_visitor_data(data, ytcfg)),

4829

**metadata)

4830

4831

def _extract_metadata_from_tabs(self, item_id, data):

4832

info = {'id': item_id}

4833

4834

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

4835

if metadata_renderer:

4836

info.update({

4837

'uploader': metadata_renderer.get('title'),

4838

'uploader_id': metadata_renderer.get('externalId'),

4839

'uploader_url': metadata_renderer.get('channelUrl'),

4840

})

4841

if info['uploader_id']:

4842

info['id'] = info['uploader_id']

4843

else:

4844

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

4845

4846

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4847

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4848

def _get_uncropped(url):

4849

return url_or_none((url or '').split('=')[0] + '=s0')

4850

4851

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

4852

if avatar_thumbnails:

4853

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4854

if uncropped_avatar:

4855

avatar_thumbnails.append({

4856

'url': uncropped_avatar,

4857

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4862

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

4863

for banner in channel_banners:

4864

banner['preference'] = -10

4865

4866

if channel_banners:

4867

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4868

if uncropped_banner:

4869

channel_banners.append({

4870

'url': uncropped_banner,

4871

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

4876

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4877

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

4878

4879

primary_thumbnails = self._extract_thumbnails(

4880

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4881

playlist_thumbnails = self._extract_thumbnails(

4882

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

4883

4884

info.update({

4885

'title': (traverse_obj(metadata_renderer, 'title')

4886

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

4887

or info['id']),

4888

'availability': self._extract_availability(data),

4889

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4890

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

4891

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

4892

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

4893

})

4894

4895

# Playlist stats is a text runs array containing [video count, view count, last updated].

4896

# last updated or (view count and last updated) may be missing.

4897

playlist_stats = get_first(

4898

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

4899

4900

last_updated_unix = self._parse_time_text(

4901

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

4902

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

4903

info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')

4904

4905

info['view_count'] = self._get_count(playlist_stats, 1)

4906

if info['view_count'] is None: # 0 is allowed

4907

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

4908

if info['view_count'] is None:

4909

info['view_count'] = self._get_count(data, (

4910

'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',

4911

'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))

4912

4913

info['playlist_count'] = self._get_count(playlist_stats, 0)

4914

if info['playlist_count'] is None: # 0 is allowed

4915

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

4916

4917

if not info.get('uploader_id'):

4918

owner = traverse_obj(playlist_header_renderer, 'ownerText')

4919

if not owner: # Deprecated

4920

owner = traverse_obj(

4921

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

4922

('videoOwner', 'videoOwnerRenderer', 'title'))

4923

owner_text = self._get_text(owner)

4924

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

4925

info.update({

4926

'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

4927

'uploader_id': browse_ep.get('browseId'),

4928

'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))

})

info.update({

'channel': info['uploader'],

4933

'channel_id': info['uploader_id'],

4934

'channel_url': info['uploader_url']

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4939

first_id = last_id = response = None

4940

for page_num in itertools.count(1):

4941

videos = list(self._playlist_entries(playlist))

4942

if not videos:

4943

return

4944

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4945

if start >= len(videos):

4946

return

4947

yield from videos[start:]

4948

first_id = first_id or videos[0]['id']

4949

last_id = videos[-1]['id']

4950

watch_endpoint = try_get(

4951

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4952

headers = self.generate_api_headers(

4953

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4954

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4955

query = {

4956

'playlistId': playlist_id,

4957

'videoId': watch_endpoint.get('videoId') or last_id,

4958

'index': watch_endpoint.get('index') or len(videos),

4959

'params': watch_endpoint.get('params') or 'OAE%3D'

4960

}

4961

response = self._extract_response(

4962

item_id='%s page %d' % (playlist_id, page_num),

4963

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4964

check_get_keys='contents'

4965

)

4966

playlist = try_get(

4967

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4968

4969

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4970

title = playlist.get('title') or try_get(

4971

data, lambda x: x['titleText']['simpleText'], str)

4972

playlist_id = playlist.get('playlistId') or item_id

4973

4974

# Delegating everything except mix playlists to regular tab-based playlist URL

4975

playlist_url = urljoin(url, try_get(

4976

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4977

str))

4978

4979

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4980

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4981

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4982

4983

if playlist_url and playlist_url != url and not is_known_unviewable:

4984

return self.url_result(

4985

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4986

video_title=title)

4987

4988

return self.playlist_result(

4989

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4990

playlist_id=playlist_id, playlist_title=title)

4991

4992

def _extract_availability(self, data):

4993

"""

4994

Gets the availability of a given playlist/tab.

4995

Note: Unless YouTube tells us explicitly, we do not assume it is public

4996

@param data: response

4997

"""

4998

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4999

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

5000

player_header_privacy = playlist_header_renderer.get('privacy')

5001

5002

badges = self._extract_badges(sidebar_renderer)

5003

5004

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

5005

privacy_setting_icon = get_first(

5006

(playlist_header_renderer, sidebar_renderer),

5007

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

5008

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

5009

expected_type=str)

5010

5011

microformats_is_unlisted = traverse_obj(

5012

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

5017

or player_header_privacy == 'PUBLIC'

5018

or privacy_setting_icon == 'PRIVACY_PUBLIC')

5019

else self._availability(

5020

is_private=(

5021

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

5022

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

5023

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

5024

is_unlisted=(

5025

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

5026

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

5027

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

5028

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

5029

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

5030

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

5035

sidebar_renderer = try_get(

5036

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

5037

for item in sidebar_renderer:

5038

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

5043

"""

5044

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

5045

"""

5046

is_playlist = bool(traverse_obj(

5047

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

5048

if not is_playlist:

5049

return

5050

headers = self.generate_api_headers(

5051

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5052

visitor_data=self._extract_visitor_data(data, ytcfg))

5053

query = {

5054

'params': 'wgYCCAA=',

5055

'browseId': f'VL{item_id}'

5056

}

5057

return self._extract_response(

5058

item_id=item_id, headers=headers, query=query,

5059

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

5060

note='Redownloading playlist API JSON with unavailable videos')

5061

5062

@functools.cached_property

5063

def skip_webpage(self):

5064

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

5065

5066

def _extract_webpage(self, url, item_id, fatal=True):

5067

webpage, data = None, None

5068

for retry in self.RetryManager(fatal=fatal):

5069

try:

5070

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

5071

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

5072

except ExtractorError as e:

5073

if isinstance(e.cause, network_exceptions):

5074

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

5075

retry.error = e

5076

continue

5077

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

5082

except ExtractorError as e:

5083

self._error_or_warning(e, fatal=fatal)

5084

break

5085

5086

# Sometimes youtube returns a webpage with incomplete ytInitialData

5087

# See: https://github.com/yt-dlp/yt-dlp/issues/116

5088

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

5089

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

5095

"""Use if failed to extract ytcfg (and data) from initial webpage"""

5096

if not ytcfg and self.is_authenticated:

5097

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

5098

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

5099

raise ExtractorError(

5100

f'{msg}. If you are not downloading private content, or '

5101

'your cookies are only for the first account and channel,'

5102

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

5103

expected=True)

5104

self.report_warning(msg, only_once=True)

5105

5106

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

5107

data = None

5108

if not self.skip_webpage:

5109

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

5110

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

5111

# Reject webpage data if redirected to home page without explicitly requesting

5112

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

5113

if (url != 'https://www.youtube.com/feed/recommended'

5114

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

5115

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

5116

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

5117

if fatal:

5118

raise ExtractorError(msg, expected=True)

5119

self.report_warning(msg, only_once=True)

5120

if not data:

5121

self._report_playlist_authcheck(ytcfg, fatal=fatal)

5122

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

5123

return data, ytcfg

5124

5125

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

5126

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

5127

resolve_response = self._extract_response(

5128

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

5129

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

5130

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

5131

for ep_key, ep in endpoints.items():

5132

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

5133

if params:

5134

return self._extract_response(

5135

item_id=item_id, query=params, ep=ep, headers=headers,

5136

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

5137

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

5138

err_note = 'Failed to resolve url (does the playlist exist?)'

5139

if fatal:

5140

raise ExtractorError(err_note, expected=True)

5141

self.report_warning(err_note, item_id)

5142

5143

_SEARCH_PARAMS = None

5144

5145

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

5146

data = {'query': query}

5147

if params is NO_DEFAULT:

5148

params = self._SEARCH_PARAMS

5149

if params:

5150

data['params'] = params

5151

5152

content_keys = (

5153

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

5154

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

5155

# ytmusic search

5156

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

5157

('continuationContents', ),

5158

)

5159

display_id = f'query "{query}"'

5160

check_get_keys = tuple({keys[0] for keys in content_keys})

5161

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

5162

self._report_playlist_authcheck(ytcfg, fatal=False)

5163

5164

continuation_list = [None]

5165

search = None

5166

for page_num in itertools.count(1):

5167

data.update(continuation_list[0] or {})

5168

headers = self.generate_api_headers(

5169

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

5170

search = self._extract_response(

5171

item_id=f'{display_id} page {page_num}', ep='search', query=data,

5172

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

5173

slr_contents = traverse_obj(search, *content_keys)

5174

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

5175

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

5180

IE_DESC = 'YouTube Tabs'

5181

_VALID_URL = r'''(?x:

5182

https?://

5183

(?!consent\.)(?:\w+\.)?

5184

(?:

5185

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5190

(?P<not_channel>

5191

feed/|hashtag/|

5192

(?:playlist|watch)\?.*?\blist=

5193

)|

5194

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5199

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5200

}

5201

IE_NAME = 'youtube:tab'

5202

5203

_TESTS = [{

5204

'note': 'playlists, multipage',

5205

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5206

'playlist_mincount': 94,

5207

'info_dict': {

5208

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5209

'title': 'Igor Kleiner - Playlists',

5210

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5211

'uploader': 'Igor Kleiner',

5212

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5213

'channel': 'Igor Kleiner',

5214

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5215

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5216

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5217

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5218

'channel_follower_count': int

5219

},

5220

}, {

5221

'note': 'playlists, multipage, different order',

5222

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5223

'playlist_mincount': 94,

5224

'info_dict': {

5225

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5226

'title': 'Igor Kleiner - Playlists',

5227

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5228

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5229

'uploader': 'Igor Kleiner',

5230

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5231

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5232

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5233

'channel': 'Igor Kleiner',

5234

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5235

'channel_follower_count': int

5236

},

5237

}, {

5238

'note': 'playlists, series',

5239

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5240

'playlist_mincount': 5,

5241

'info_dict': {

5242

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5243

'title': '3Blue1Brown - Playlists',

5244

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5245

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5246

'uploader': '3Blue1Brown',

5247

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5248

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5249

'channel': '3Blue1Brown',

5250

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5251

'tags': ['Mathematics'],

5252

'channel_follower_count': int

5253

},

5254

}, {

5255

'note': 'playlists, singlepage',

5256

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5257

'playlist_mincount': 4,

5258

'info_dict': {

5259

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5260

'title': 'ThirstForScience - Playlists',

5261

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5262

'uploader': 'ThirstForScience',

5263

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5264

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5265

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5266

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5267

'tags': 'count:13',

5268

'channel': 'ThirstForScience',

5269

'channel_follower_count': int

5270

}

5271

}, {

5272

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5273

'only_matching': True,

5274

}, {

5275

'note': 'basic, single video playlist',

5276

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5277

'info_dict': {

5278

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5279

'uploader': 'Sergey M.',

5280

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5281

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5286

'channel': 'Sergey M.',

5287

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5288

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5289

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5290

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5295

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5296

'info_dict': {

5297

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5298

'uploader': 'Sergey M.',

5299

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5300

'title': 'youtube-dl empty playlist',

5301

'tags': [],

5302

'channel': 'Sergey M.',

5303

'description': '',

5304

'modified_date': '20160902',

5305

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5306

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5307

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5308

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5314

'info_dict': {

5315

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5316

'title': 'lex will - Home',

5317

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5318

'uploader': 'lex will',

5319

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5320

'channel': 'lex will',

5321

'tags': ['bible', 'history', 'prophesy'],

5322

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5323

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5324

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5325

'channel_follower_count': int

5326

},

5327

'playlist_mincount': 2,

5328

}, {

5329

'note': 'Videos tab',

5330

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5331

'info_dict': {

5332

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5333

'title': 'lex will - Videos',

5334

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5335

'uploader': 'lex will',

5336

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5337

'tags': ['bible', 'history', 'prophesy'],

5338

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5339

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5340

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5341

'channel': 'lex will',

5342

'channel_follower_count': int

5343

},

5344

'playlist_mincount': 975,

5345

}, {

5346

'note': 'Videos tab, sorted by popular',

5347

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5348

'info_dict': {

5349

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5350

'title': 'lex will - Videos',

5351

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5352

'uploader': 'lex will',

5353

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5354

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5355

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5356

'channel': 'lex will',

5357

'tags': ['bible', 'history', 'prophesy'],

5358

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5359

'channel_follower_count': int

5360

},

5361

'playlist_mincount': 199,

5362

}, {

5363

'note': 'Playlists tab',

5364

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5365

'info_dict': {

5366

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5367

'title': 'lex will - Playlists',

5368

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5369

'uploader': 'lex will',

5370

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5371

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5372

'channel': 'lex will',

5373

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5374

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5375

'tags': ['bible', 'history', 'prophesy'],

5376

'channel_follower_count': int

5377

},

5378

'playlist_mincount': 17,

5379

}, {

5380

'note': 'Community tab',

5381

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5382

'info_dict': {

5383

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5384

'title': 'lex will - Community',

5385

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5386

'uploader': 'lex will',

5387

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5388

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5389

'channel': 'lex will',

5390

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5391

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5392

'tags': ['bible', 'history', 'prophesy'],

5393

'channel_follower_count': int

5394

},

5395

'playlist_mincount': 18,

5396

}, {

5397

'note': 'Channels tab',

5398

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5399

'info_dict': {

5400

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5401

'title': 'lex will - Channels',

5402

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5403

'uploader': 'lex will',

5404

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5405

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5406

'channel': 'lex will',

5407

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5408

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5409

'tags': ['bible', 'history', 'prophesy'],

5410

'channel_follower_count': int

5411

},

5412

'playlist_mincount': 12,

5413

}, {

5414

'note': 'Search tab',

5415

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5416

'playlist_mincount': 40,

5417

'info_dict': {

5418

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5419

'title': '3Blue1Brown - Search - linear algebra',

5420

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5421

'uploader': '3Blue1Brown',

5422

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5423

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5424

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5425

'tags': ['Mathematics'],

5426

'channel': '3Blue1Brown',

5427

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5428

'channel_follower_count': int

5429

},

5430

}, {

5431

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5432

'only_matching': True,

5433

}, {

5434

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5435

'only_matching': True,

5436

}, {

5437

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5438

'only_matching': True,

5439

}, {

5440

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5441

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5442

'info_dict': {

5443

'title': '29C3: Not my department',

5444

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5445

'uploader': 'Christiaan008',

5446

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5447

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5448

'tags': [],

5449

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5450

'view_count': int,

5451

'modified_date': '20150605',

5452

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5453

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5454

'channel': 'Christiaan008',

5455

'availability': 'public',

5456

},

5457

'playlist_count': 96,

5458

}, {

5459

'note': 'Large playlist',

5460

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5461

'info_dict': {

5462

'title': 'Uploads from Cauchemar',

5463

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5464

'uploader': 'Cauchemar',

5465

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5466

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5467

'tags': [],

5468

'modified_date': r're:\d{8}',

5469

'channel': 'Cauchemar',

5470

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5471

'view_count': int,

5472

'description': '',

5473

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5474

'availability': 'public',

5475

},

5476

'playlist_mincount': 1123,

5477

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5478

}, {

5479

'note': 'even larger playlist, 8832 videos',

5480

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5481

'only_matching': True,

5482

}, {

5483

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5484

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5485

'info_dict': {

5486

'title': 'Uploads from Interstellar Movie',

5487

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5488

'uploader': 'Interstellar Movie',

5489

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5490

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5491

'tags': [],

5492

'view_count': int,

5493

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5494

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5495

'channel': 'Interstellar Movie',

5496

'description': '',

5497

'modified_date': r're:\d{8}',

5498

'availability': 'public',

5499

},

5500

'playlist_mincount': 21,

5501

}, {

5502

'note': 'Playlist with "show unavailable videos" button',

5503

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5504

'info_dict': {

5505

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5506

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5507

'uploader': 'Phim Siêu Nhân Nhật Bản',

5508

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5509

'view_count': int,

5510

'channel': 'Phim Siêu Nhân Nhật Bản',

5511

'tags': [],

5512

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5513

'description': '',

5514

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5515

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5516

'modified_date': r're:\d{8}',

5517

'availability': 'public',

5518

},

5519

'playlist_mincount': 200,

5520

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5521

}, {

5522

'note': 'Playlist with unavailable videos in page 7',

5523

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5524

'info_dict': {

5525

'title': 'Uploads from BlankTV',

5526

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5527

'uploader': 'BlankTV',

5528

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5529

'channel': 'BlankTV',

5530

'channel_url': 'https://www.youtube.com/c/blanktv',

5531

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5532

'view_count': int,

5533

'tags': [],

5534

'uploader_url': 'https://www.youtube.com/c/blanktv',

5535

'modified_date': r're:\d{8}',

5536

'description': '',

5537

'availability': 'public',

5538

},

5539

'playlist_mincount': 1000,

5540

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5541

}, {

5542

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5543

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5544

'info_dict': {

5545

'title': 'Data Analysis with Dr Mike Pound',

5546

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5547

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5548

'uploader': 'Computerphile',

5549

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5550

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5551

'tags': [],

5552

'view_count': int,

5553

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5554

'channel_url': 'https://www.youtube.com/user/Computerphile',

5555

'channel': 'Computerphile',

5556

'availability': 'public',

5557

'modified_date': '20190712',

5558

},

5559

'playlist_mincount': 11,

5560

}, {

5561

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5562

'only_matching': True,

5563

}, {

5564

'note': 'Playlist URL that does not actually serve a playlist',

5565

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5570

'uploader': 'STREEM',

5571

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5572

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5573

'upload_date': '20150526',

5574

'license': 'Standard YouTube License',

5575

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5576

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5583

},

5584

'skip': 'This video is not available.',

5585

'add_ie': [YoutubeIE.ie_key()],

5586

}, {

5587

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5588

'only_matching': True,

5589

}, {

5590

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5591

'only_matching': True,

5592

}, {

5593

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5594

'info_dict': {

5595

'id': 'Wq15eF5vCbI', # This will keep changing

5596

'ext': 'mp4',

5597

'title': str,

5598

'uploader': 'Sky News',

5599

'uploader_id': 'skynews',

5600

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5601

'upload_date': r're:\d{8}',

5602

'description': str,

5603

'categories': ['News & Politics'],

5604

'tags': list,

5605

'like_count': int,

5606

'release_timestamp': int,

5607

'channel': 'Sky News',

5608

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5609

'age_limit': 0,

5610

'view_count': int,

5611

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5612

'playable_in_embed': True,

5613

'release_date': r're:\d+',

5614

'availability': 'public',

5615

'live_status': 'is_live',

5616

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5617

'channel_follower_count': int,

5618

'concurrent_view_count': int,

5619

},

5620

'params': {

5621

'skip_download': True,

5622

},

5623

'expected_warnings': ['Ignoring subtitle tracks found in '],

5624

}, {

5625

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5630

'uploader': 'The Young Turks',

5631

'uploader_id': 'TheYoungTurks',

5632

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5633

'upload_date': '20150715',

5634

'license': 'Standard YouTube License',

5635

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5636

'categories': ['News & Politics'],

5637

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5642

},

5643

'only_matching': True,

5644

}, {

5645

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5646

'only_matching': True,

5647

}, {

5648

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5649

'only_matching': True,

5650

}, {

5651

'note': 'A channel that is not live. Should raise error',

5652

'url': 'https://www.youtube.com/user/numberphile/live',

5653

'only_matching': True,

5654

}, {

5655

'url': 'https://www.youtube.com/feed/trending',

5656

'only_matching': True,

5657

}, {

5658

'url': 'https://www.youtube.com/feed/library',

5659

'only_matching': True,

5660

}, {

5661

'url': 'https://www.youtube.com/feed/history',

5662

'only_matching': True,

5663

}, {

5664

'url': 'https://www.youtube.com/feed/subscriptions',

5665

'only_matching': True,

5666

}, {

5667

'url': 'https://www.youtube.com/feed/watch_later',

5668

'only_matching': True,

5669

}, {

5670

'note': 'Recommended - redirects to home page.',

5671

'url': 'https://www.youtube.com/feed/recommended',

5672

'only_matching': True,

5673

}, {

5674

'note': 'inline playlist with not always working continuations',

5675

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5676

'only_matching': True,

5677

}, {

5678

'url': 'https://www.youtube.com/course',

5679

'only_matching': True,

5680

}, {

5681

'url': 'https://www.youtube.com/zsecurity',

5682

'only_matching': True,

5683

}, {

5684

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5685

'only_matching': True,

5686

}, {

5687

'url': 'https://www.youtube.com/TheYoungTurks/live',

5688

'only_matching': True,

5689

}, {

5690

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5697

}, {

5698

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5699

'only_matching': True,

5700

}, {

5701

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5702

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5703

'only_matching': True

5704

}, {

5705

'note': '/browse/ should redirect to /channel/',

5706

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5707

'only_matching': True

5708

}, {

5709

'note': 'VLPL, should redirect to playlist?list=PL...',

5710

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5711

'info_dict': {

5712

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5713

'uploader': 'NoCopyrightSounds',

5714

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5715

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5716

'title': 'NCS : All Releases 💿',

5717

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5718

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5719

'modified_date': r're:\d{8}',

5720

'view_count': int,

5721

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5722

'tags': [],

5723

'channel': 'NoCopyrightSounds',

5724

'availability': 'public',

5725

},

5726

'playlist_mincount': 166,

5727

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5728

}, {

5729

'note': 'Topic, should redirect to playlist?list=UU...',

5730

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5731

'info_dict': {

5732

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5733

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5734

'title': 'Uploads from Royalty Free Music - Topic',

5735

'uploader': 'Royalty Free Music - Topic',

5736

'tags': [],

5737

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5738

'channel': 'Royalty Free Music - Topic',

5739

'view_count': int,

5740

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5741

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5742

'modified_date': r're:\d{8}',

5743

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5744

'description': '',

5745

'availability': 'public',

5746

},

5747

'playlist_mincount': 101,

5748

}, {

5749

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5750

# Treat as a general feed

5751

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5752

'info_dict': {

5753

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5754

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5755

'tags': [],

5756

},

5757

'playlist_mincount': 9,

5758

}, {

5759

'note': 'Youtube music Album',

5760

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5761

'info_dict': {

5762

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5763

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5768

'modified_date': r're:\d{8}',

5769

},

5770

'playlist_count': 50,

5771

}, {

5772

'note': 'unlisted single video playlist',

5773

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5774

'info_dict': {

5775

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5776

'uploader': 'colethedj',

5777

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5778

'title': 'yt-dlp unlisted playlist test',

5779

'availability': 'unlisted',

5780

'tags': [],

5781

'modified_date': '20220418',

5782

'channel': 'colethedj',

5783

'view_count': int,

5784

'description': '',

5785

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5786

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5787

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5792

'url': 'https://www.youtube.com/feed/recommended',

5793

'info_dict': {

5794

'id': 'recommended',

5795

'title': 'recommended',

5796

'tags': [],

5797

},

5798

'playlist_mincount': 50,

5799

'params': {

5800

'skip_download': True,

5801

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5802

},

5803

}, {

5804

'note': 'API Fallback: /videos tab, sorted by oldest first',

5805

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5806

'info_dict': {

5807

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5808

'title': 'Cody\'sLab - Videos',

5809

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5810

'uploader': 'Cody\'sLab',

5811

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5812

'channel': 'Cody\'sLab',

5813

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5814

'tags': [],

5815

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5816

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5817

'channel_follower_count': int

5818

},

5819

'playlist_mincount': 650,

5820

'params': {

5821

'skip_download': True,

5822

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5823

},

5824

'skip': 'Query for sorting no longer works',

5825

}, {

5826

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5827

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5828

'info_dict': {

5829

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5830

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5831

'title': 'Uploads from Royalty Free Music - Topic',

5832

'uploader': 'Royalty Free Music - Topic',

5833

'modified_date': r're:\d{8}',

5834

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5835

'description': '',

5836

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5837

'tags': [],

5838

'channel': 'Royalty Free Music - Topic',

5839

'view_count': int,

5840

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5841

'availability': 'public',

5842

},

5843

'playlist_mincount': 101,

5844

'params': {

5845

'skip_download': True,

5846

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5847

},

5848

}, {

5849

'note': 'non-standard redirect to regional channel',

5850

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5851

'only_matching': True

5852

}, {

5853

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5854

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5855

'info_dict': {

5856

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5857

'modified_date': '20220407',

5858

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5859

'tags': [],

5860

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5861

'uploader': 'pukkandan',

5862

'availability': 'unlisted',

5863

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5864

'channel': 'pukkandan',

5865

'description': 'Test for collaborative playlist',

5866

'title': 'yt-dlp test - collaborative playlist',

5867

'view_count': int,

5868

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5869

},

5870

'playlist_mincount': 2

5871

}, {

5872

'note': 'translated tab name',

5873

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5874

'info_dict': {

5875

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5876

'tags': [],

5877

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5878

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5879

'description': 'test description',

5880

'title': 'cole-dlp-test-acc - 再生リスト',

5881

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5882

'uploader': 'cole-dlp-test-acc',

5883

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5884

'channel': 'cole-dlp-test-acc',

5885

},

5886

'playlist_mincount': 1,

5887

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5888

'expected_warnings': ['Preferring "ja"'],

5889

}, {

5890

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5891

'note': 'preferred lang set with playlist with translated video titles',

5892

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5893

'info_dict': {

5894

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5895

'tags': [],

5896

'view_count': int,

5897

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5898

'uploader': 'cole-dlp-test-acc',

5899

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5900

'channel': 'cole-dlp-test-acc',

5901

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5902

'description': 'test',

5903

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5904

'title': 'dlp test playlist',

5905

'availability': 'public',

5906

},

5907

'playlist_mincount': 1,

5908

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5909

'expected_warnings': ['Preferring "ja"'],

5910

}, {

5911

# shorts audio pivot for 2GtVksBMYFM.

5912

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5913

'info_dict': {

5914

'id': 'sfv_audio_pivot',

5915

'title': 'sfv_audio_pivot',

5916

'tags': [],

5917

},

5918

'playlist_mincount': 50,

5919

5920

}, {

5921

# Channel with a real live tab (not to be mistaken with streams tab)

5922

# Do not treat like it should redirect to live stream

5923

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

5924

'info_dict': {

5925

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

5926

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

5927

'tags': [],

5928

},

5929

'playlist_mincount': 20,

5930

}, {

5931

# Tab name is not the same as tab id

5932

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

5933

'info_dict': {

5934

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5935

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

5936

'tags': [],

5937

},

5938

'playlist_mincount': 8,

5939

}, {

5940

# Home tab id is literally home. Not to get mistaken with featured

5941

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

5942

'info_dict': {

5943

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5944

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

5945

'tags': [],

5946

},

5947

'playlist_mincount': 8,

5948

}, {

5949

# Should get three playlists for videos, shorts and streams tabs

5950

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5951

'info_dict': {

5952

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5953

'title': 'Polka Ch. 尾丸ポルカ',

5954

'channel_follower_count': int,

5955

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5956

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5957

'uploader': 'Polka Ch. 尾丸ポルカ',

5958

'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',

5959

'channel': 'Polka Ch. 尾丸ポルカ',

5960

'tags': 'count:35',

5961

'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5962

'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

5967

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

5968

'info_dict': {

5969

'id': 'UC0intLFzLaudFG-xAvUEO-A',

5970

'title': 'Not Just Bikes - Shorts',

5971

'tags': 'count:12',

5972

'uploader': 'Not Just Bikes',

5973

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5974

'description': 'md5:7513148b1f02b924783157d84c4ea555',

5975

'channel_follower_count': int,

5976

'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',

5977

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

5978

'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5979

'channel': 'Not Just Bikes',

5980

},

5981

'playlist_mincount': 10,

5982

}, {

5983

# Streams tab

5984

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

5985

'info_dict': {

5986

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5987

'title': '中村悠一 - Live',

5988

'tags': 'count:7',

5989

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5990

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5991

'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5992

'channel': '中村悠一',

5993

'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5994

'channel_follower_count': int,

5995

'uploader': '中村悠一',

5996

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

5997

},

5998

'playlist_mincount': 60,

5999

}, {

6000

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

6001

# See test_youtube_lists

6002

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

6003

'only_matching': True,

6004

}, {

6005

# No uploads and no UCID given. Should fail with no uploads error

6006

# See test_youtube_lists

6007

'url': 'https://www.youtube.com/news',

6008

'only_matching': True

6009

}, {

6010

# No videos tab but has a shorts tab

6011

'url': 'https://www.youtube.com/c/TKFShorts',

6012

'info_dict': {

6013

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6014

'title': 'Shorts Break - Shorts',

6015

'tags': 'count:32',

6016

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6017

'channel': 'Shorts Break',

6018

'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',

6019

'uploader': 'Shorts Break',

6020

'channel_follower_count': int,

6021

'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6022

'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

6023

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

6024

},

6025

'playlist_mincount': 30,

6026

}, {

6027

# Trending Now Tab. tab id is empty

6028

'url': 'https://www.youtube.com/feed/trending',

6029

'info_dict': {

6030

'id': 'trending',

6031

'title': 'trending - Now',

6032

'tags': [],

6033

},

6034

'playlist_mincount': 30,

6035

}, {

6036

# Trending Gaming Tab. tab id is empty

6037

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

6038

'info_dict': {

6039

'id': 'trending',

6040

'title': 'trending - Gaming',

6041

'tags': [],

6042

},

6043

'playlist_mincount': 30,

6044

}, {

6045

# Shorts url result in shorts tab

6046

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

6047

'info_dict': {

6048

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6049

'title': 'cole-dlp-test-acc - Shorts',

6050

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6051

'channel': 'cole-dlp-test-acc',

6052

'description': 'test description',

6053

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6054

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6055

'tags': [],

6056

'uploader': 'cole-dlp-test-acc',

6057

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

6065

'id': 'sSM9J5YH_60',

6066

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6067

'title': 'SHORT short',

6068

'channel': 'cole-dlp-test-acc',

6069

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

6075

}, {

6076

# Live video status should be extracted

6077

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

6078

'info_dict': {

6079

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6080

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

6088

'id': str,

6089

'title': str,

6090

'live_status': 'is_live',

6091

'channel_id': str,

6092

'channel_url': str,

6093

'concurrent_view_count': int,

'channel': str,

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6098

'playlist_mincount': 1

6099

}, {

6100

# Channel renderer metadata. Contains number of videos on the channel

6101

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',

6102

'info_dict': {

6103

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6104

'title': 'cole-dlp-test-acc - Channels',

6105

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6106

'channel': 'cole-dlp-test-acc',

6107

'description': 'test description',

6108

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6109

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6110

'tags': [],

6111

'uploader': 'cole-dlp-test-acc',

6112

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'YoutubeTab',

6119

'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6120

'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6121

'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6122

'title': 'PewDiePie',

6123

'channel': 'PewDiePie',

6124

'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6125

'thumbnails': list,

6126

'channel_follower_count': int,

6127

'playlist_count': int

6128

}

6129

}],

6130

'params': {'extract_flat': True},

6131

}, {

6132

'url': 'https://www.youtube.com/@3blue1brown/about',

6133

'info_dict': {

6134

'id': 'UCYO_jab_esuFRV4b17AJtAw',

6135

'tags': ['Mathematics'],

6136

'title': '3Blue1Brown - About',

6137

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

6138

'channel_follower_count': int,

6139

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

6140

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

6141

'channel': '3Blue1Brown',

6142

'uploader': '3Blue1Brown',

6143

'view_count': int,

6144

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

6145

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

},

'playlist_count': 0,

}]

@classmethod

def suitable(cls, url):

6152

return False if YoutubeIE.suitable(url) else super().suitable(url)

6153

6154

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

6155

6156

def _get_url_mobj(self, url):

6157

mobj = self._URL_RE.match(url).groupdict()

6158

mobj.update((k, '') for k, v in mobj.items() if v is None)

6159

return mobj

6160

6161

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

6162

tab_name = (tab.get('title') or '').lower()

6163

tab_url = urljoin(base_url, traverse_obj(

6164

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

6165

6166

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

6167

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

6168

if tab_id:

6169

return {

6170

'TAB_ID_SPONSORSHIPS': 'membership',

6171

}.get(tab_id, tab_id), tab_name

6172

6173

# Fallback to tab name if we cannot get the tab id.

6174

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

6175

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

6176

if tab_name:

6177

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

6182

6183

def _has_tab(self, tabs, tab_id):

6184

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

6185

6186

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

6187

def _real_extract(self, url, smuggled_data):

6188

item_id = self._match_id(url)

6189

url = urllib.parse.urlunparse(

6190

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

6191

compat_opts = self.get_param('compat_opts', [])

6192

6193

mobj = self._get_url_mobj(url)

6194

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

6195

if is_channel and smuggled_data.get('is_music_url'):

6196

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

6197

return self.url_result(

6198

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

6199

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

6200

mdata = self._extract_tab_endpoint(

6201

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

6202

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

6203

get_all=False, expected_type=str)

6204

if not murl:

6205

raise ExtractorError('Failed to resolve album to playlist')

6206

return self.url_result(murl, YoutubeTabIE)

6207

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

6208

return self.url_result(

6209

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

6210

6211

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

6212

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

6213

url = f'{pre}/videos{post}'

6214

6215

# Handle both video/playlist URLs

6216

qs = parse_qs(url)

6217

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

6218

if not video_id and mobj['not_channel'].startswith('watch'):

6219

if not playlist_id:

6220

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

6221

raise ExtractorError('A video URL was given without video ID', expected=True)

6222

# Common mistake: https://www.youtube.com/watch?list=playlist_id

6223

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

6224

return self.url_result(

6225

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

6226

6227

if not self._yes_playlist(playlist_id, video_id):

6228

return self.url_result(

6229

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6230

6231

data, ytcfg = self._extract_data(url, display_id)

6232

6233

# YouTube may provide a non-standard redirect to the regional channel

6234

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6235

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6236

redirect_url = traverse_obj(

6237

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6238

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6239

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6240

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6241

return self.url_result(redirect_url, YoutubeTabIE)

6242

6243

tabs, extra_tabs = self._extract_tab_renderers(data), []

6244

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6245

selected_tab = self._extract_selected_tab(tabs)

6246

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6247

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6248

6249

if not original_tab_id and selected_tab_name:

6250

self.to_screen('Downloading all uploads of the channel. '

6251

'To download only the videos in a specific tab, pass the tab\'s URL')

6252

if self._has_tab(tabs, 'streams'):

6253

extra_tabs.append(''.join((pre, '/streams', post)))

6254

if self._has_tab(tabs, 'shorts'):

6255

extra_tabs.append(''.join((pre, '/shorts', post)))

6256

# XXX: Members-only tab should also be extracted

6257

6258

if not extra_tabs and selected_tab_id != 'videos':

6259

# Channel does not have streams, shorts or videos tabs

6260

if item_id[:2] != 'UC':

6261

raise ExtractorError('This channel has no uploads', expected=True)

6262

6263

# Topic channels don't have /videos. Use the equivalent playlist instead

6264

pl_id = f'UU{item_id[2:]}'

6265

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6266

try:

6267

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6268

except ExtractorError:

6269

raise ExtractorError('This channel has no uploads', expected=True)

6270

else:

6271

item_id, url = pl_id, pl_url

6272

self.to_screen(

6273

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6274

6275

elif extra_tabs and selected_tab_id != 'videos':

6276

# When there are shorts/live tabs but not videos tab

6277

url, data = f'{pre}{post}', None

6278

6279

elif (original_tab_id or 'videos') != selected_tab_id:

6280

if original_tab_id == 'live':

6281

# Live tab should have redirected to the video

6282

# Except in the case the channel has an actual live tab

6283

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6284

raise UserNotLive(video_id=item_id)

6285

elif selected_tab_name:

6286

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6287

6288

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6289

url = f'{pre}{post}'

6290

6291

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6292

if 'no-youtube-unavailable-videos' not in compat_opts:

6293

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6294

self._extract_and_report_alerts(data, only_once=True)

6295

6296

tabs, entries = self._extract_tab_renderers(data), []

6297

if tabs:

6298

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6299

entries[0].update({

6300

'extractor_key': YoutubeTabIE.ie_key(),

6301

'extractor': YoutubeTabIE.IE_NAME,

6302

'webpage_url': url,

6303

})

6304

if self.get_param('playlist_items') == '0':

6305

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6306

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6307

entries.extend(map(self._real_extract, extra_tabs))

6308

6309

if len(entries) == 1:

6310

return entries[0]

6311

elif entries:

6312

metadata = self._extract_metadata_from_tabs(item_id, data)

6313

uploads_url = 'the Uploads (UU) playlist URL'

6314

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6315

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6316

self.to_screen(

6317

'Downloading as multiple playlists, separated by tabs. '

6318

f'To download as a single playlist instead, pass {uploads_url}')

6319

return self.playlist_result(entries, item_id, **metadata)

6320

6321

# Inline playlist

6322

playlist = traverse_obj(

6323

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6324

if playlist:

6325

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6326

6327

video_id = traverse_obj(

6328

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6329

if video_id:

6330

if tab != '/live': # live tab is expected to redirect to video

6331

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6332

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6333

6334

raise ExtractorError('Unable to recognize tab page')

6335

6336

6337

class YoutubePlaylistIE(InfoExtractor):

6338

IE_DESC = 'YouTube playlists'

6339

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6350

)''' % {

6351

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6352

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6353

}

6354

IE_NAME = 'youtube:playlist'

6355

_TESTS = [{

6356

'note': 'issue #673',

6357

'url': 'PLBB231211A4F62143',

6358

'info_dict': {

6359

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6360

'id': 'PLBB231211A4F62143',

6361

'uploader': 'Wickman',

6362

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6363

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6364

'view_count': int,

6365

'uploader_url': 'https://www.youtube.com/c/WickmanVT',

6366

'modified_date': r're:\d{8}',

6367

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6368

'channel': 'Wickman',

6369

'tags': [],

6370

'channel_url': 'https://www.youtube.com/c/WickmanVT',

6371

'availability': 'public',

6372

},

6373

'playlist_mincount': 29,

6374

}, {

6375

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6376

'info_dict': {

6377

'title': 'YDL_safe_search',

6378

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6379

},

6380

'playlist_count': 2,

6381

'skip': 'This playlist is private',

6382

}, {

6383

'note': 'embedded',

6384

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6389

'uploader': 'milan',

6390

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6391

'description': '',

6392

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6393

'tags': [],

6394

'modified_date': '20140919',

6395

'view_count': int,

6396

'channel': 'milan',

6397

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6398

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6399

'availability': 'public',

6400

},

6401

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],

6402

}, {

6403

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6404

'playlist_mincount': 455,

6405

'info_dict': {

6406

'title': '2018 Chinese New Singles (11/6 updated)',

6407

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6408

'uploader': 'LBK',

6409

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6410

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6411

'channel': 'LBK',

6412

'view_count': int,

6413

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

6414

'tags': [],

6415

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

6416

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6417

'modified_date': r're:\d{8}',

6418

'availability': 'public',

6419

},

6420

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6421

}, {

6422

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6423

'only_matching': True,

6424

}, {

6425

# music album playlist

6426

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6427

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6432

if YoutubeTabIE.suitable(url):

6433

return False

6434

from ..utils import parse_qs

6435

qs = parse_qs(url)

6436

if qs.get('v', [None])[0]:

6437

return False

6438

return super().suitable(url)

6439

6440

def _real_extract(self, url):

6441

playlist_id = self._match_id(url)

6442

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6443

url = update_url_query(

6444

'https://www.youtube.com/playlist',

6445

parse_qs(url) or {'list': playlist_id})

6446

if is_music_url:

6447

url = smuggle_url(url, {'is_music_url': True})

6448

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6449

6450

6451

class YoutubeYtBeIE(InfoExtractor):

6452

IE_DESC = 'youtu.be'

6453

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6454

_TESTS = [{

6455

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6460

'uploader': 'Backus-Page House Museum',

6461

'uploader_id': 'backuspagemuseum',

6462

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

6463

'upload_date': '20161008',

6464

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6465

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6470

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

6471

'channel': 'Backus-Page House Museum',

6472

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6473

'live_status': 'not_live',

6474

'view_count': int,

6475

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6476

'availability': 'public',

6477

'duration': 59,

6478

'comment_count': int,

6479

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6484

},

6485

}, {

6486

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6487

'only_matching': True,

6488

}]

6489

6490

def _real_extract(self, url):

6491

mobj = self._match_valid_url(url)

6492

video_id = mobj.group('id')

6493

playlist_id = mobj.group('playlist_id')

6494

return self.url_result(

6495

update_url_query('https://www.youtube.com/watch', {

6496

'v': video_id,

6497

'list': playlist_id,

6498

'feature': 'youtu.be',

6499

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6500

6501

6502

class YoutubeLivestreamEmbedIE(InfoExtractor):

6503

IE_DESC = 'YouTube livestream embeds'

6504

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6505

_TESTS = [{

6506

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6507

'only_matching': True,

6508

}]

6509

6510

def _real_extract(self, url):

6511

channel_id = self._match_id(url)

6512

return self.url_result(

6513

f'https://www.youtube.com/channel/{channel_id}/live',

6514

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6515

6516

6517

class YoutubeYtUserIE(InfoExtractor):

6518

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6519

IE_NAME = 'youtube:user'

6520

_VALID_URL = r'ytuser:(?P<id>.+)'

6521

_TESTS = [{

6522

'url': 'ytuser:phihag',

6523

'only_matching': True,

6524

}]

6525

6526

def _real_extract(self, url):

6527

user_id = self._match_id(url)

6528

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6529

6530

6531

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6532

IE_NAME = 'youtube:favorites'

6533

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6534

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6535

_LOGIN_REQUIRED = True

6536

_TESTS = [{

6537

'url': ':ytfav',

6538

'only_matching': True,

6539

}, {

6540

'url': ':ytfavorites',

6541

'only_matching': True,

6542

}]

6543

6544

def _real_extract(self, url):

6545

return self.url_result(

6546

'https://www.youtube.com/playlist?list=LL',

6547

ie=YoutubeTabIE.ie_key())

6548

6549

6550

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6551

IE_NAME = 'youtube:notif'

6552

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6553

_VALID_URL = r':ytnotif(?:ication)?s?'

6554

_LOGIN_REQUIRED = True

6555

_TESTS = [{

6556

'url': ':ytnotif',

6557

'only_matching': True,

6558

}, {

6559

'url': ':ytnotifications',

6560

'only_matching': True,

6561

}]

6562

6563

def _extract_notification_menu(self, response, continuation_list):

6564

notification_list = traverse_obj(

6565

response,

6566

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6567

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6568

expected_type=list) or []

6569

continuation_list[0] = None

6570

for item in notification_list:

6571

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6572

if entry:

6573

yield entry

6574

continuation = item.get('continuationItemRenderer')

6575

if continuation:

6576

continuation_list[0] = continuation

6577

6578

def _extract_notification_renderer(self, notification):

6579

video_id = traverse_obj(

6580

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6581

url = f'https://www.youtube.com/watch?v={video_id}'

6582

channel_id = None

6583

if not video_id:

6584

browse_ep = traverse_obj(

6585

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6586

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6587

post_id = self._search_regex(

6588

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6589

'post id', default=None)

6590

if not channel_id or not post_id:

6591

return

6592

# The direct /post url redirects to this in the browser

6593

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6594

6595

channel = traverse_obj(

6596

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6597

expected_type=str)

6598

notification_title = self._get_text(notification, 'shortMessage')

6599

if notification_title:

6600

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6601

# TODO: handle recommended videos

6602

title = self._search_regex(

6603

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6604

'video title', default=None)

6605

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6606

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6612

'video_id': video_id,

6613

'title': title,

6614

'channel_id': channel_id,

6615

'channel': channel,

6616

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6617

'timestamp': timestamp,

6618

}

6619

6620

def _notification_menu_entries(self, ytcfg):

6621

continuation_list = [None]

6622

response = None

6623

for page in itertools.count(1):

6624

ctoken = traverse_obj(

6625

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6626

response = self._extract_response(

6627

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6628

ep='notification/get_notification_menu', check_get_keys='actions',

6629

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6630

yield from self._extract_notification_menu(response, continuation_list)

6631

if not continuation_list[0]:

6632

break

6633

6634

def _real_extract(self, url):

6635

display_id = 'notifications'

6636

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6637

self._report_playlist_authcheck(ytcfg)

6638

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6639

6640

6641

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6642

IE_DESC = 'YouTube search'

6643

IE_NAME = 'youtube:search'

6644

_SEARCH_KEY = 'ytsearch'

6645

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6646

_TESTS = [{

6647

'url': 'ytsearch5:youtube-dl test video',

6648

'playlist_count': 5,

6649

'info_dict': {

6650

'id': 'youtube-dl test video',

6651

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6657

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6658

_SEARCH_KEY = 'ytsearchdate'

6659

IE_DESC = 'YouTube search, newest videos first'

6660

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6661

_TESTS = [{

6662

'url': 'ytsearchdate5:youtube-dl test video',

6663

'playlist_count': 5,

6664

'info_dict': {

6665

'id': 'youtube-dl test video',

6666

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6672

IE_DESC = 'YouTube search URLs with sorting and filter support'

6673

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6674

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6675

_TESTS = [{

6676

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6677

'playlist_mincount': 5,

6678

'info_dict': {

6679

'id': 'youtube-dl test video',

6680

'title': 'youtube-dl test video',

6681

}

6682

}, {

6683

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6684

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6691

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6696

# 'entries': [{

6697

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

# Channel results

'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',

6704

'info_dict': {

6705

'id': 'kurzgesagt',

6706

'title': 'kurzgesagt',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6712

'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

6713

'ie_key': 'YoutubeTab',

6714

'channel': 'Kurzgesagt – In a Nutshell',

6715

'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',

6716

'title': 'Kurzgesagt – In a Nutshell',

6717

'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6718

'playlist_count': int, # XXX: should have a way of saying > 1

6719

'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

'thumbnails': list

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6724

'playlist_mincount': 1,

6725

}, {

6726

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6727

'only_matching': True,

6728

}]

6729

6730

def _real_extract(self, url):

6731

qs = parse_qs(url)

6732

query = (qs.get('search_query') or qs.get('q'))[0]

6733

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6734

6735

6736

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6737

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6738

IE_NAME = 'youtube:music:search_url'

6739

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6740

_TESTS = [{

6741

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6742

'playlist_count': 16,

6743

'info_dict': {

6744

'id': 'royalty free music',

6745

'title': 'royalty free music',

6746

}

6747

}, {

6748

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6749

'playlist_mincount': 30,

6750

'info_dict': {

6751

'id': 'royalty free music - songs',

6752

'title': 'royalty free music - songs',

6753

},

6754

'params': {'extract_flat': 'in_playlist'}

6755

}, {

6756

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6757

'playlist_mincount': 30,

6758

'info_dict': {

6759

'id': 'royalty free music - community playlists',

6760

'title': 'royalty free music - community playlists',

6761

},

6762

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6767

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6768

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6769

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6770

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6771

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6772

}

6773

6774

def _real_extract(self, url):

6775

qs = parse_qs(url)

6776

query = (qs.get('search_query') or qs.get('q'))[0]

6777

params = qs.get('sp', (None,))[0]

6778

if params:

6779

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6780

else:

6781

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6782

params = self._SECTIONS.get(section)

6783

if not params:

6784

section = None

6785

title = join_nonempty(query, section, delim=' - ')

6786

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6787

6788

6789

class YoutubeFeedsInfoExtractor(InfoExtractor):

6790

"""

6791

Base class for feed extractors

6792

Subclasses must re-define the _FEED_NAME property.

6793

"""

6794

_LOGIN_REQUIRED = True

6795

_FEED_NAME = 'feeds'

6796

6797

def _real_initialize(self):

6798

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6803

6804

def _real_extract(self, url):

6805

return self.url_result(

6806

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6807

6808

6809

class YoutubeWatchLaterIE(InfoExtractor):

6810

IE_NAME = 'youtube:watchlater'

6811

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6812

_VALID_URL = r':ytwatchlater'

6813

_TESTS = [{

6814

'url': ':ytwatchlater',

6815

'only_matching': True,

6816

}]

6817

6818

def _real_extract(self, url):

6819

return self.url_result(

6820

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6821

6822

6823

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6824

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6825

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6826

_FEED_NAME = 'recommended'

6827

_LOGIN_REQUIRED = False

6828

_TESTS = [{

6829

'url': ':ytrec',

6830

'only_matching': True,

6831

}, {

6832

'url': ':ytrecommended',

6833

'only_matching': True,

6834

}, {

6835

'url': 'https://youtube.com',

6836

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6841

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6842

_VALID_URL = r':ytsub(?:scription)?s?'

6843

_FEED_NAME = 'subscriptions'

6844

_TESTS = [{

6845

'url': ':ytsubs',

6846

'only_matching': True,

6847

}, {

6848

'url': ':ytsubscriptions',

6849

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6854

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6855

_VALID_URL = r':ythis(?:tory)?'

6856

_FEED_NAME = 'history'

6857

_TESTS = [{

6858

'url': ':ythistory',

6859

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6864

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6865

IE_NAME = 'youtube:stories'

6866

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6867

_TESTS = [{

6868

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6869

'only_matching': True,

6870

}]

6871

6872

def _real_extract(self, url):

6873

playlist_id = f'RLTD{self._match_id(url)}'

6874

return self.url_result(

6875

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6876

ie=YoutubeTabIE, video_id=playlist_id)

6877

6878

6879

class YoutubeShortsAudioPivotIE(InfoExtractor):

6880

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6881

IE_NAME = 'youtube:shorts:pivot:audio'

6882

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6883

_TESTS = [{

6884

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6885

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6890

"""

6891

Generates sfv_audio_pivot browse params for this video id

6892

"""

6893

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6894

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6895

6896

def _real_extract(self, url):

6897

video_id = self._match_id(url)

6898

return self.url_result(

6899

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6904

IE_NAME = 'youtube:truncated_url'

6905

IE_DESC = False # Do not list

6906

_VALID_URL = r'''(?x)

6907

(?:https?://)?

6908

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6909

(?:watch\?(?:

6910

feature=[a-z_]+|

6911

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6924

'only_matching': True,

6925

}, {

6926

'url': 'https://www.youtube.com/watch?',

6927

'only_matching': True,

6928

}, {

6929

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6930

'only_matching': True,

6931

}, {

6932

'url': 'https://www.youtube.com/watch?feature=foo',

6933

'only_matching': True,

6934

}, {

6935

'url': 'https://www.youtube.com/watch?hl=en-GB',

6936

'only_matching': True,

6937

}, {

6938

'url': 'https://www.youtube.com/watch?t=2372',

6939

'only_matching': True,

6940

}]

6941

6942

def _real_extract(self, url):

6943

raise ExtractorError(

6944

'Did you forget to quote the URL? Remember that & is a meta '

6945

'character in most shells, so you want to put the URL in quotes, '

6946

'like youtube-dl '

6947

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6948

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6953

IE_NAME = 'youtube:clip'

6954

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6955

_TESTS = [{

6956

# FIXME: Other metadata should be extracted from the clip, not from the base video

6957

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6958

'info_dict': {

6959

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6960

'ext': 'mp4',

6961

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6966

'categories': ['Gaming'],

6967

'channel': 'Scott The Woz',

6968

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6969

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6970

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6971

'like_count': int,

6972

'playable_in_embed': True,

6973

'tags': 'count:17',

6974

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6975

'title': 'Mobile Games on Console - Scott The Woz',

6976

'upload_date': '20210920',

6977

'uploader': 'Scott The Woz',

6978

'uploader_id': 'scottthewoz',

6979

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6980

'view_count': int,

6981

'live_status': 'not_live',

6982

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6987

clip_id = self._match_id(url)

6988

_, data = self._extract_webpage(url, clip_id)

6989

6990

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6991

if not video_id:

6992

raise ExtractorError('Unable to find video ID')

6993

6994

clip_data = traverse_obj(data, (

6995

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6996

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6997

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6998

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6999

7000

return {

7001

'_type': 'url_transparent',

7002

'url': f'https://www.youtube.com/watch?v={video_id}',

7003

'ie_key': YoutubeIE.ie_key(),

7004

'id': clip_id,

7005

'section_start': int(clip_data['startTimeMs']) / 1000,

7006

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):

7011

IE_NAME = 'youtube:consent'

7012

IE_DESC = False # Do not list

7013

_VALID_URL = r'https?://consent\.youtube\.com/m\?'

7014

_TESTS = [{

7015

'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

7021

'comment_count': int,

7022

'chapters': 'count:13',

7023

'upload_date': '20221223',

7024

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

7025

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

7026

'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

7027

'like_count': int,

7028

'release_date': '20221223',

7029

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

7030

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

7031

'view_count': int,

7032

'playable_in_embed': True,

7033

'duration': 4438,

7034

'availability': 'public',

7035

'channel_follower_count': int,

7036

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

7037

'categories': ['Entertainment'],

7038

'live_status': 'was_live',

7039

'release_timestamp': 1671793345,

7040

'channel': 'さなちゃんねる',

7041

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

7042

'uploader': 'さなちゃんねる',

7043

},

7044

'add_ie': ['Youtube'],

7045

'params': {'skip_download': 'Youtube'},

7046

}]

7047

7048

def _real_extract(self, url):

7049

redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])

7050

if not redirect_url:

7051

raise ExtractorError('Invalid cookie consent redirect URL', expected=True)

7052

return self.url_result(redirect_url)

7053

7054

7055

class YoutubeTruncatedIDIE(InfoExtractor):

7056

IE_NAME = 'youtube:truncated_id'

7057

IE_DESC = False # Do not list

7058

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

7059

7060

_TESTS = [{

7061

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

7062

'only_matching': True,

7063

}]

7064

7065

def _real_extract(self, url):

7066

video_id = self._match_id(url)

7067

raise ExtractorError(

7068

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

7069

expected=True)