jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import collections
	4	import copy
	5	import datetime
	6	import enum
	7	import hashlib
	8	import itertools
	9	import json
	10	import math
	11	import os.path
	12	import random
	13	import re
	14	import sys
	15	import threading
	16	import time
	17	import traceback
	18	import urllib.error
	19	import urllib.parse
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from .openload import PhantomJSwrapper
	23	from ..compat import functools
	24	from ..jsinterp import JSInterpreter
	25	from ..utils import (
	26	NO_DEFAULT,
	27	ExtractorError,
	28	LazyList,
	29	UserNotLive,
	30	bug_reports_message,
	31	classproperty,
	32	clean_html,
	33	datetime_from_str,
	34	dict_get,
	35	filter_dict,
	36	float_or_none,
	37	format_field,
	38	get_first,
	39	int_or_none,
	40	is_html,
	41	join_nonempty,
	42	js_to_json,
	43	mimetype2ext,
	44	network_exceptions,
	45	orderedSet,
	46	parse_codecs,
	47	parse_count,
	48	parse_duration,
	49	parse_iso8601,
	50	parse_qs,
	51	qualities,
	52	remove_start,
	53	smuggle_url,
	54	str_or_none,
	55	str_to_int,
	56	strftime_or_none,
	57	traverse_obj,
	58	try_get,
	59	unescapeHTML,
	60	unified_strdate,
	61	unified_timestamp,
	62	unsmuggle_url,
	63	update_url_query,
	64	url_or_none,
	65	urljoin,
	66	variadic,
	67	)
	68
	69
	70	STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
	71	# any clients starting with _ cannot be explicitly requested by the user
	72	INNERTUBE_CLIENTS = {
	73	'web': {
	74	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	75	'INNERTUBE_CONTEXT': {
	76	'client': {
	77	'clientName': 'WEB',
	78	'clientVersion': '2.20220801.00.00',
	79	}
	80	},
	81	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	82	},
	83	'web_embedded': {
	84	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	85	'INNERTUBE_CONTEXT': {
	86	'client': {
	87	'clientName': 'WEB_EMBEDDED_PLAYER',
	88	'clientVersion': '1.20220731.00.00',
	89	},
	90	},
	91	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	92	},
	93	'web_music': {
	94	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	95	'INNERTUBE_HOST': 'music.youtube.com',
	96	'INNERTUBE_CONTEXT': {
	97	'client': {
	98	'clientName': 'WEB_REMIX',
	99	'clientVersion': '1.20220727.01.00',
	100	}
	101	},
	102	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	103	},
	104	'web_creator': {
	105	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	106	'INNERTUBE_CONTEXT': {
	107	'client': {
	108	'clientName': 'WEB_CREATOR',
	109	'clientVersion': '1.20220726.00.00',
	110	}
	111	},
	112	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	113	},
	114	'android': {
	115	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	116	'INNERTUBE_CONTEXT': {
	117	'client': {
	118	'clientName': 'ANDROID',
	119	'clientVersion': '17.31.35',
	120	'androidSdkVersion': 30,
	121	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	122	}
	123	},
	124	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	125	'REQUIRE_JS_PLAYER': False
	126	},
	127	'android_embedded': {
	128	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	129	'INNERTUBE_CONTEXT': {
	130	'client': {
	131	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	132	'clientVersion': '17.31.35',
	133	'androidSdkVersion': 30,
	134	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	135	},
	136	},
	137	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	138	'REQUIRE_JS_PLAYER': False
	139	},
	140	'android_music': {
	141	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	142	'INNERTUBE_CONTEXT': {
	143	'client': {
	144	'clientName': 'ANDROID_MUSIC',
	145	'clientVersion': '5.16.51',
	146	'androidSdkVersion': 30,
	147	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	148	}
	149	},
	150	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	151	'REQUIRE_JS_PLAYER': False
	152	},
	153	'android_creator': {
	154	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	155	'INNERTUBE_CONTEXT': {
	156	'client': {
	157	'clientName': 'ANDROID_CREATOR',
	158	'clientVersion': '22.30.100',
	159	'androidSdkVersion': 30,
	160	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	161	},
	162	},
	163	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	164	'REQUIRE_JS_PLAYER': False
	165	},
	166	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	167	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	168	'ios': {
	169	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS',
	173	'clientVersion': '17.33.2',
	174	'deviceModel': 'iPhone14,3',
	175	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	176	}
	177	},
	178	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	179	'REQUIRE_JS_PLAYER': False
	180	},
	181	'ios_embedded': {
	182	'INNERTUBE_CONTEXT': {
	183	'client': {
	184	'clientName': 'IOS_MESSAGES_EXTENSION',
	185	'clientVersion': '17.33.2',
	186	'deviceModel': 'iPhone14,3',
	187	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	188	},
	189	},
	190	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	191	'REQUIRE_JS_PLAYER': False
	192	},
	193	'ios_music': {
	194	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	195	'INNERTUBE_CONTEXT': {
	196	'client': {
	197	'clientName': 'IOS_MUSIC',
	198	'clientVersion': '5.21',
	199	'deviceModel': 'iPhone14,3',
	200	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	201	},
	202	},
	203	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	204	'REQUIRE_JS_PLAYER': False
	205	},
	206	'ios_creator': {
	207	'INNERTUBE_CONTEXT': {
	208	'client': {
	209	'clientName': 'IOS_CREATOR',
	210	'clientVersion': '22.33.101',
	211	'deviceModel': 'iPhone14,3',
	212	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	213	},
	214	},
	215	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	216	'REQUIRE_JS_PLAYER': False
	217	},
	218	# mweb has 'ultralow' formats
	219	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	220	'mweb': {
	221	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	222	'INNERTUBE_CONTEXT': {
	223	'client': {
	224	'clientName': 'MWEB',
	225	'clientVersion': '2.20220801.00.00',
	226	}
	227	},
	228	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	229	},
	230	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	231	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	232	'tv_embedded': {
	233	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	234	'INNERTUBE_CONTEXT': {
	235	'client': {
	236	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	237	'clientVersion': '2.0',
	238	},
	239	},
	240	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	241	},
	242	}
	243
	244
	245	def _split_innertube_client(client_name):
	246	variant, *base = client_name.rsplit('.', 1)
	247	if base:
	248	return variant, base[0], variant
	249	base, *variant = client_name.split('_', 1)
	250	return client_name, base, variant[0] if variant else None
	251
	252
	253	def short_client_name(client_name):
	254	main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
	255	return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
	256
	257
	258	def build_innertube_clients():
	259	THIRD_PARTY = {
	260	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	261	}
	262	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	263	priority = qualities(BASE_CLIENTS[::-1])
	264
	265	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	266	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	267	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	268	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	269	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	270
	271	_, base_client, variant = _split_innertube_client(client)
	272	ytcfg['priority'] = 10 * priority(base_client)
	273
	274	if not variant:
	275	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	276	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	277	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	278	embedscreen['priority'] -= 3
	279	elif variant == 'embedded':
	280	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	281	ytcfg['priority'] -= 2
	282	else:
	283	ytcfg['priority'] -= 3
	284
	285
	286	build_innertube_clients()
	287
	288
	289	class BadgeType(enum.Enum):
	290	AVAILABILITY_UNLISTED = enum.auto()
	291	AVAILABILITY_PRIVATE = enum.auto()
	292	AVAILABILITY_PUBLIC = enum.auto()
	293	AVAILABILITY_PREMIUM = enum.auto()
	294	AVAILABILITY_SUBSCRIPTION = enum.auto()
	295	LIVE_NOW = enum.auto()
	296
	297
	298	class YoutubeBaseInfoExtractor(InfoExtractor):
	299	"""Provide base functions for Youtube extractors"""
	300
	301	_RESERVED_NAMES = (
	302	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|live\|watch_popup\|clip\|'
	303	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	304	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	305	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	306
	307	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	308
	309	# _NETRC_MACHINE = 'youtube'
	310
	311	# If True it will raise an error if no login info is provided
	312	_LOGIN_REQUIRED = False
	313
	314	_INVIDIOUS_SITES = (
	315	# invidious-redirect websites
	316	r'(?:www\.)?redirect\.invidious\.io',
	317	r'(?:(?:www\|dev)\.)?invidio\.us',
	318	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	319	r'(?:www\.)?invidious\.pussthecat\.org',
	320	r'(?:www\.)?invidious\.zee\.li',
	321	r'(?:www\.)?invidious\.ethibox\.fr',
	322	r'(?:www\.)?iv\.ggtyler\.dev',
	323	r'(?:www\.)?inv\.vern\.i2p',
	324	r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
	325	r'(?:www\.)?inv\.riverside\.rocks',
	326	r'(?:www\.)?invidious\.silur\.me',
	327	r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
	328	r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
	329	r'(?:www\.)?invidious\.slipfox\.xyz',
	330	r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
	331	r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
	332	r'(?:www\.)?invidious\.tiekoetter\.com',
	333	r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
	334	r'(?:www\.)?invidious\.nerdvpn\.de',
	335	r'(?:www\.)?invidious\.weblibre\.org',
	336	r'(?:www\.)?inv\.odyssey346\.dev',
	337	r'(?:www\.)?invidious\.dhusch\.de',
	338	r'(?:www\.)?iv\.melmac\.space',
	339	r'(?:www\.)?watch\.thekitty\.zone',
	340	r'(?:www\.)?invidious\.privacydev\.net',
	341	r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
	342	r'(?:www\.)?invidious\.drivet\.xyz',
	343	r'(?:www\.)?vid\.priv\.au',
	344	r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
	345	r'(?:www\.)?inv\.vern\.cc',
	346	r'(?:www\.)?invidious\.esmailelbob\.xyz',
	347	r'(?:www\.)?invidious\.sethforprivacy\.com',
	348	r'(?:www\.)?yt\.oelrichsgarcia\.de',
	349	r'(?:www\.)?yt\.artemislena\.eu',
	350	r'(?:www\.)?invidious\.flokinet\.to',
	351	r'(?:www\.)?invidious\.baczek\.me',
	352	r'(?:www\.)?y\.com\.sb',
	353	r'(?:www\.)?invidious\.epicsite\.xyz',
	354	r'(?:www\.)?invidious\.lidarshield\.cloud',
	355	r'(?:www\.)?yt\.funami\.tech',
	356	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	357	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	358	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	359	# youtube-dl invidious instances list
	360	r'(?:(?:www\|no)\.)?invidiou\.sh',
	361	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	362	r'(?:www\.)?invidious\.kabi\.tk',
	363	r'(?:www\.)?invidious\.mastodon\.host',
	364	r'(?:www\.)?invidious\.zapashcanon\.fr',
	365	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	366	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	367	r'(?:www\.)?invidious\.himiko\.cloud',
	368	r'(?:www\.)?invidious\.reallyancient\.tech',
	369	r'(?:www\.)?invidious\.tube',
	370	r'(?:www\.)?invidiou\.site',
	371	r'(?:www\.)?invidious\.site',
	372	r'(?:www\.)?invidious\.xyz',
	373	r'(?:www\.)?invidious\.nixnet\.xyz',
	374	r'(?:www\.)?invidious\.048596\.xyz',
	375	r'(?:www\.)?invidious\.drycat\.fr',
	376	r'(?:www\.)?inv\.skyn3t\.in',
	377	r'(?:www\.)?tube\.poal\.co',
	378	r'(?:www\.)?tube\.connect\.cafe',
	379	r'(?:www\.)?vid\.wxzm\.sx',
	380	r'(?:www\.)?vid\.mint\.lgbt',
	381	r'(?:www\.)?vid\.puffyan\.us',
	382	r'(?:www\.)?yewtu\.be',
	383	r'(?:www\.)?yt\.elukerio\.org',
	384	r'(?:www\.)?yt\.lelux\.fi',
	385	r'(?:www\.)?invidious\.ggc-project\.de',
	386	r'(?:www\.)?yt\.maisputain\.ovh',
	387	r'(?:www\.)?ytprivate\.com',
	388	r'(?:www\.)?invidious\.13ad\.de',
	389	r'(?:www\.)?invidious\.toot\.koeln',
	390	r'(?:www\.)?invidious\.fdn\.fr',
	391	r'(?:www\.)?watch\.nettohikari\.com',
	392	r'(?:www\.)?invidious\.namazso\.eu',
	393	r'(?:www\.)?invidious\.silkky\.cloud',
	394	r'(?:www\.)?invidious\.exonip\.de',
	395	r'(?:www\.)?invidious\.riverside\.rocks',
	396	r'(?:www\.)?invidious\.blamefran\.net',
	397	r'(?:www\.)?invidious\.moomoo\.de',
	398	r'(?:www\.)?ytb\.trom\.tf',
	399	r'(?:www\.)?yt\.cyberhost\.uk',
	400	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	401	r'(?:www\.)?qklhadlycap4cnod\.onion',
	402	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	403	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	404	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	405	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	406	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	407	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	408	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	409	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	410	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	411	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	412	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	413	r'(?:www\.)?piped\.kavin\.rocks',
	414	r'(?:www\.)?piped\.tokhmi\.xyz',
	415	r'(?:www\.)?piped\.syncpundit\.io',
	416	r'(?:www\.)?piped\.mha\.fi',
	417	r'(?:www\.)?watch\.whatever\.social',
	418	r'(?:www\.)?piped\.garudalinux\.org',
	419	r'(?:www\.)?piped\.rivo\.lol',
	420	r'(?:www\.)?piped-libre\.kavin\.rocks',
	421	r'(?:www\.)?yt\.jae\.fi',
	422	r'(?:www\.)?piped\.mint\.lgbt',
	423	r'(?:www\.)?il\.ax',
	424	r'(?:www\.)?piped\.esmailelbob\.xyz',
	425	r'(?:www\.)?piped\.projectsegfau\.lt',
	426	r'(?:www\.)?piped\.privacydev\.net',
	427	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	428	r'(?:www\.)?piped\.smnz\.de',
	429	r'(?:www\.)?piped\.adminforge\.de',
	430	r'(?:www\.)?watch\.whatevertinfoil\.de',
	431	r'(?:www\.)?piped\.qdi\.fi',
	432	r'(?:www\.)?piped\.video',
	433	r'(?:www\.)?piped\.aeong\.one',
	434	r'(?:www\.)?piped\.moomoo\.me',
	435	r'(?:www\.)?piped\.chauvet\.pro',
	436	r'(?:www\.)?watch\.leptons\.xyz',
	437	r'(?:www\.)?pd\.vern\.cc',
	438	r'(?:www\.)?piped\.hostux\.net',
	439	r'(?:www\.)?piped\.lunar\.icu',
	440	# Hyperpipe instances from https://hyperpipe.codeberg.page/
	441	r'(?:www\.)?hyperpipe\.surge\.sh',
	442	r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
	443	r'(?:www\.)?listen\.whatever\.social',
	444	r'(?:www\.)?music\.adminforge\.de',
	445	)
	446
	447	# extracted from account/account_menu ep
	448	# XXX: These are the supported YouTube UI and API languages,
	449	# which is slightly different from languages supported for translation in YouTube studio
	450	_SUPPORTED_LANG_CODES = [
	451	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	452	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	453	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	454	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	455	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	456	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	457	]
	458
	459	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	460
	461	@functools.cached_property
	462	def _preferred_lang(self):
	463	"""
	464	Returns a language code supported by YouTube for the user preferred language.
	465	Returns None if no preferred language set.
	466	"""
	467	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	468	if not preferred_lang:
	469	return
	470	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	471	raise ExtractorError(
	472	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	473	expected=True)
	474	elif preferred_lang != 'en':
	475	self.report_warning(
	476	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	477	return preferred_lang
	478
	479	def _initialize_consent(self):
	480	cookies = self._get_cookies('https://www.youtube.com/')
	481	if cookies.get('__Secure-3PSID'):
	482	return
	483	consent_id = None
	484	consent = cookies.get('CONSENT')
	485	if consent:
	486	if 'YES' in consent.value:
	487	return
	488	consent_id = self._search_regex(
	489	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	490	if not consent_id:
	491	consent_id = random.randint(100, 999)
	492	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	493
	494	def _initialize_pref(self):
	495	cookies = self._get_cookies('https://www.youtube.com/')
	496	pref_cookie = cookies.get('PREF')
	497	pref = {}
	498	if pref_cookie:
	499	try:
	500	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

1

import base64

import calendar

import collections

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

22

from .openload import PhantomJSwrapper

23

from ..compat import functools

24

from ..jsinterp import JSInterpreter

25

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'

71

# any clients starting with _ cannot be explicitly requested by the user

72

INNERTUBE_CLIENTS = {

73

'web': {

74

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

75

'INNERTUBE_CONTEXT': {

76

'client': {

77

'clientName': 'WEB',

78

'clientVersion': '2.20220801.00.00',

79

}

80

},

81

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

82

},

83

'web_embedded': {

84

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

85

'INNERTUBE_CONTEXT': {

86

'client': {

87

'clientName': 'WEB_EMBEDDED_PLAYER',

88

'clientVersion': '1.20220731.00.00',

89

},

90

},

91

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

92

},

93

'web_music': {

94

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

95

'INNERTUBE_HOST': 'music.youtube.com',

96

'INNERTUBE_CONTEXT': {

97

'client': {

98

'clientName': 'WEB_REMIX',

99

'clientVersion': '1.20220727.01.00',

100

}

101

},

102

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

103

},

104

'web_creator': {

105

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

106

'INNERTUBE_CONTEXT': {

107

'client': {

108

'clientName': 'WEB_CREATOR',

109

'clientVersion': '1.20220726.00.00',

110

}

111

},

112

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

113

},

114

'android': {

115

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

116

'INNERTUBE_CONTEXT': {

117

'client': {

118

'clientName': 'ANDROID',

119

'clientVersion': '17.31.35',

120

'androidSdkVersion': 30,

121

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

122

}

123

},

124

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

125

'REQUIRE_JS_PLAYER': False

126

},

127

'android_embedded': {

128

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

129

'INNERTUBE_CONTEXT': {

130

'client': {

131

'clientName': 'ANDROID_EMBEDDED_PLAYER',

132

'clientVersion': '17.31.35',

133

'androidSdkVersion': 30,

134

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

135

},

136

},

137

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

138

'REQUIRE_JS_PLAYER': False

139

},

140

'android_music': {

141

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

142

'INNERTUBE_CONTEXT': {

143

'client': {

144

'clientName': 'ANDROID_MUSIC',

145

'clientVersion': '5.16.51',

146

'androidSdkVersion': 30,

147

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

148

}

149

},

150

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

151

'REQUIRE_JS_PLAYER': False

152

},

153

'android_creator': {

154

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

155

'INNERTUBE_CONTEXT': {

156

'client': {

157

'clientName': 'ANDROID_CREATOR',

158

'clientVersion': '22.30.100',

159

'androidSdkVersion': 30,

160

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

161

},

162

},

163

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

164

'REQUIRE_JS_PLAYER': False

165

},

166

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

167

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

168

'ios': {

169

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS',

173

'clientVersion': '17.33.2',

174

'deviceModel': 'iPhone14,3',

175

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

176

}

177

},

178

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

179

'REQUIRE_JS_PLAYER': False

180

},

181

'ios_embedded': {

182

'INNERTUBE_CONTEXT': {

183

'client': {

184

'clientName': 'IOS_MESSAGES_EXTENSION',

185

'clientVersion': '17.33.2',

186

'deviceModel': 'iPhone14,3',

187

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

188

},

189

},

190

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

191

'REQUIRE_JS_PLAYER': False

192

},

193

'ios_music': {

194

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

195

'INNERTUBE_CONTEXT': {

196

'client': {

197

'clientName': 'IOS_MUSIC',

198

'clientVersion': '5.21',

199

'deviceModel': 'iPhone14,3',

200

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

201

},

202

},

203

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

204

'REQUIRE_JS_PLAYER': False

205

},

206

'ios_creator': {

207

'INNERTUBE_CONTEXT': {

208

'client': {

209

'clientName': 'IOS_CREATOR',

210

'clientVersion': '22.33.101',

211

'deviceModel': 'iPhone14,3',

212

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

213

},

214

},

215

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

216

'REQUIRE_JS_PLAYER': False

217

},

218

# mweb has 'ultralow' formats

219

# See: https://github.com/yt-dlp/yt-dlp/pull/557

220

'mweb': {

221

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

222

'INNERTUBE_CONTEXT': {

223

'client': {

224

'clientName': 'MWEB',

225

'clientVersion': '2.20220801.00.00',

226

}

227

},

228

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

229

},

230

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

231

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

232

'tv_embedded': {

233

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

234

'INNERTUBE_CONTEXT': {

235

'client': {

236

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

237

'clientVersion': '2.0',

238

},

239

},

240

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

246

variant, *base = client_name.rsplit('.', 1)

247

if base:

248

return variant, base[0], variant

249

base, *variant = client_name.split('_', 1)

250

return client_name, base, variant[0] if variant else None

251

252

253

def short_client_name(client_name):

254

main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')

255

return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()

256

257

258

def build_innertube_clients():

259

THIRD_PARTY = {

260

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

261

}

262

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

263

priority = qualities(BASE_CLIENTS[::-1])

264

265

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

266

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

267

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

268

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

269

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

270

271

_, base_client, variant = _split_innertube_client(client)

272

ytcfg['priority'] = 10 * priority(base_client)

273

274

if not variant:

275

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

276

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

277

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

278

embedscreen['priority'] -= 3

279

elif variant == 'embedded':

280

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

281

ytcfg['priority'] -= 2

282

else:

283

ytcfg['priority'] -= 3

284

285

286

build_innertube_clients()

287

288

289

class BadgeType(enum.Enum):

290

AVAILABILITY_UNLISTED = enum.auto()

291

AVAILABILITY_PRIVATE = enum.auto()

292

AVAILABILITY_PUBLIC = enum.auto()

293

AVAILABILITY_PREMIUM = enum.auto()

294

AVAILABILITY_SUBSCRIPTION = enum.auto()

295

LIVE_NOW = enum.auto()

296

297

298

class YoutubeBaseInfoExtractor(InfoExtractor):

299

"""Provide base functions for Youtube extractors"""

_RESERVED_NAMES = (

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

308

309

# _NETRC_MACHINE = 'youtube'

310

311

# If True it will raise an error if no login info is provided

312

_LOGIN_REQUIRED = False

313

314

_INVIDIOUS_SITES = (

315

# invidious-redirect websites

316

r'(?:www\.)?redirect\.invidious\.io',

317

r'(?:(?:www|dev)\.)?invidio\.us',

318

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

319

r'(?:www\.)?invidious\.pussthecat\.org',

320

r'(?:www\.)?invidious\.zee\.li',

321

r'(?:www\.)?invidious\.ethibox\.fr',

322

r'(?:www\.)?iv\.ggtyler\.dev',

323

r'(?:www\.)?inv\.vern\.i2p',

324

r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',

325

r'(?:www\.)?inv\.riverside\.rocks',

326

r'(?:www\.)?invidious\.silur\.me',

327

r'(?:www\.)?inv\.bp\.projectsegfau\.lt',

328

r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',

329

r'(?:www\.)?invidious\.slipfox\.xyz',

330

r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',

331

r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',

332

r'(?:www\.)?invidious\.tiekoetter\.com',

333

r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',

334

r'(?:www\.)?invidious\.nerdvpn\.de',

335

r'(?:www\.)?invidious\.weblibre\.org',

336

r'(?:www\.)?inv\.odyssey346\.dev',

337

r'(?:www\.)?invidious\.dhusch\.de',

338

r'(?:www\.)?iv\.melmac\.space',

339

r'(?:www\.)?watch\.thekitty\.zone',

340

r'(?:www\.)?invidious\.privacydev\.net',

341

r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',

342

r'(?:www\.)?invidious\.drivet\.xyz',

343

r'(?:www\.)?vid\.priv\.au',

344

r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',

345

r'(?:www\.)?inv\.vern\.cc',

346

r'(?:www\.)?invidious\.esmailelbob\.xyz',

347

r'(?:www\.)?invidious\.sethforprivacy\.com',

348

r'(?:www\.)?yt\.oelrichsgarcia\.de',

349

r'(?:www\.)?yt\.artemislena\.eu',

350

r'(?:www\.)?invidious\.flokinet\.to',

351

r'(?:www\.)?invidious\.baczek\.me',

352

r'(?:www\.)?y\.com\.sb',

353

r'(?:www\.)?invidious\.epicsite\.xyz',

354

r'(?:www\.)?invidious\.lidarshield\.cloud',

355

r'(?:www\.)?yt\.funami\.tech',

356

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

357

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

358

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

359

# youtube-dl invidious instances list

360

r'(?:(?:www|no)\.)?invidiou\.sh',

361

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

362

r'(?:www\.)?invidious\.kabi\.tk',

363

r'(?:www\.)?invidious\.mastodon\.host',

364

r'(?:www\.)?invidious\.zapashcanon\.fr',

365

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

366

r'(?:www\.)?invidious\.tinfoil-hat\.net',

367

r'(?:www\.)?invidious\.himiko\.cloud',

368

r'(?:www\.)?invidious\.reallyancient\.tech',

369

r'(?:www\.)?invidious\.tube',

370

r'(?:www\.)?invidiou\.site',

371

r'(?:www\.)?invidious\.site',

372

r'(?:www\.)?invidious\.xyz',

373

r'(?:www\.)?invidious\.nixnet\.xyz',

374

r'(?:www\.)?invidious\.048596\.xyz',

375

r'(?:www\.)?invidious\.drycat\.fr',

376

r'(?:www\.)?inv\.skyn3t\.in',

377

r'(?:www\.)?tube\.poal\.co',

378

r'(?:www\.)?tube\.connect\.cafe',

379

r'(?:www\.)?vid\.wxzm\.sx',

380

r'(?:www\.)?vid\.mint\.lgbt',

381

r'(?:www\.)?vid\.puffyan\.us',

382

r'(?:www\.)?yewtu\.be',

383

r'(?:www\.)?yt\.elukerio\.org',

384

r'(?:www\.)?yt\.lelux\.fi',

385

r'(?:www\.)?invidious\.ggc-project\.de',

386

r'(?:www\.)?yt\.maisputain\.ovh',

387

r'(?:www\.)?ytprivate\.com',

388

r'(?:www\.)?invidious\.13ad\.de',

389

r'(?:www\.)?invidious\.toot\.koeln',

390

r'(?:www\.)?invidious\.fdn\.fr',

391

r'(?:www\.)?watch\.nettohikari\.com',

392

r'(?:www\.)?invidious\.namazso\.eu',

393

r'(?:www\.)?invidious\.silkky\.cloud',

394

r'(?:www\.)?invidious\.exonip\.de',

395

r'(?:www\.)?invidious\.riverside\.rocks',

396

r'(?:www\.)?invidious\.blamefran\.net',

397

r'(?:www\.)?invidious\.moomoo\.de',

398

r'(?:www\.)?ytb\.trom\.tf',

399

r'(?:www\.)?yt\.cyberhost\.uk',

400

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

401

r'(?:www\.)?qklhadlycap4cnod\.onion',

402

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

403

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

404

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

405

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

406

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

407

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

408

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

409

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

410

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

411

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

412

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

413

r'(?:www\.)?piped\.kavin\.rocks',

414

r'(?:www\.)?piped\.tokhmi\.xyz',

415

r'(?:www\.)?piped\.syncpundit\.io',

416

r'(?:www\.)?piped\.mha\.fi',

417

r'(?:www\.)?watch\.whatever\.social',

418

r'(?:www\.)?piped\.garudalinux\.org',

419

r'(?:www\.)?piped\.rivo\.lol',

420

r'(?:www\.)?piped-libre\.kavin\.rocks',

421

r'(?:www\.)?yt\.jae\.fi',

422

r'(?:www\.)?piped\.mint\.lgbt',

423

r'(?:www\.)?il\.ax',

424

r'(?:www\.)?piped\.esmailelbob\.xyz',

425

r'(?:www\.)?piped\.projectsegfau\.lt',

426

r'(?:www\.)?piped\.privacydev\.net',

427

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

428

r'(?:www\.)?piped\.smnz\.de',

429

r'(?:www\.)?piped\.adminforge\.de',

430

r'(?:www\.)?watch\.whatevertinfoil\.de',

431

r'(?:www\.)?piped\.qdi\.fi',

432

r'(?:www\.)?piped\.video',

433

r'(?:www\.)?piped\.aeong\.one',

434

r'(?:www\.)?piped\.moomoo\.me',

435

r'(?:www\.)?piped\.chauvet\.pro',

436

r'(?:www\.)?watch\.leptons\.xyz',

437

r'(?:www\.)?pd\.vern\.cc',

438

r'(?:www\.)?piped\.hostux\.net',

439

r'(?:www\.)?piped\.lunar\.icu',

440

# Hyperpipe instances from https://hyperpipe.codeberg.page/

441

r'(?:www\.)?hyperpipe\.surge\.sh',

442

r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',

443

r'(?:www\.)?listen\.whatever\.social',

444

r'(?:www\.)?music\.adminforge\.de',

445

)

446

447

# extracted from account/account_menu ep

448

# XXX: These are the supported YouTube UI and API languages,

449

# which is slightly different from languages supported for translation in YouTube studio

450

_SUPPORTED_LANG_CODES = [

451

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

452

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

453

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

454

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

455

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

456

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

457

]

458

459

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

460

461

@functools.cached_property

462

def _preferred_lang(self):

463

"""

464

Returns a language code supported by YouTube for the user preferred language.

465

Returns None if no preferred language set.

466

"""

467

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

468

if not preferred_lang:

469

return

470

if preferred_lang not in self._SUPPORTED_LANG_CODES:

471

raise ExtractorError(

472

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

473

expected=True)

474

elif preferred_lang != 'en':

475

self.report_warning(

476

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

477

return preferred_lang

478

479

def _initialize_consent(self):

480

cookies = self._get_cookies('https://www.youtube.com/')

481

if cookies.get('__Secure-3PSID'):

482

return

483

consent_id = None

484

consent = cookies.get('CONSENT')

485

if consent:

486

if 'YES' in consent.value:

487

return

488

consent_id = self._search_regex(

489

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

490

if not consent_id:

491

consent_id = random.randint(100, 999)

492

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

493

494

def _initialize_pref(self):

495

cookies = self._get_cookies('https://www.youtube.com/')

496

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

501

except ValueError:

502

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

503

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

504

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

505

506

def _real_initialize(self):

507

self._initialize_pref()

508

self._initialize_consent()

509

self._check_login_required()

510

511

def _check_login_required(self):

512

if self._LOGIN_REQUIRED and not self._cookies_passed:

513

self.raise_login_required('Login details are needed to download this content', method='cookies')

514

515

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

516

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

517

518

def _get_default_ytcfg(self, client='web'):

519

return copy.deepcopy(INNERTUBE_CLIENTS[client])

520

521

def _get_innertube_host(self, client='web'):

522

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

523

524

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

525

# try_get but with fallback to default ytcfg client values when present

526

_func = lambda y: try_get(y, getter, expected_type)

527

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

528

529

def _extract_client_name(self, ytcfg, default_client='web'):

530

return self._ytcfg_get_safe(

531

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

532

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

533

534

def _extract_client_version(self, ytcfg, default_client='web'):

535

return self._ytcfg_get_safe(

536

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

537

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

538

539

def _select_api_hostname(self, req_api_hostname, default_client=None):

540

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

541

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

542

543

def _extract_api_key(self, ytcfg=None, default_client='web'):

544

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

545

546

def _extract_context(self, ytcfg=None, default_client='web'):

547

context = get_first(

548

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

549

# Enforce language and tz for extraction

550

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

551

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

557

time_now = round(time.time())

558

if self._SAPISID is None:

559

yt_cookies = self._get_cookies('https://www.youtube.com')

560

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

561

# See: https://github.com/yt-dlp/yt-dlp/issues/393

562

sapisid_cookie = dict_get(

563

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

564

if sapisid_cookie and sapisid_cookie.value:

565

self._SAPISID = sapisid_cookie.value

566

self.write_debug('Extracted SAPISID cookie')

567

# SAPISID cookie is required if not already present

568

if not yt_cookies.get('SAPISID'):

569

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

570

self._set_cookie(

571

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

572

else:

573

self._SAPISID = False

574

if not self._SAPISID:

575

return None

576

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

577

sapisidhash = hashlib.sha1(

578

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

579

return f'SAPISIDHASH {time_now}_{sapisidhash}'

580

581

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

582

note='Downloading API JSON', errnote='Unable to download API page',

583

context=None, api_key=None, api_hostname=None, default_client='web'):

584

585

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

586

data.update(query)

587

real_headers = self.generate_api_headers(default_client=default_client)

588

real_headers.update({'content-type': 'application/json'})

589

if headers:

590

real_headers.update(headers)

591

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

592

or api_key or self._extract_api_key(default_client=default_client))

593

return self._download_json(

594

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

595

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

596

data=json.dumps(data).encode('utf8'), headers=real_headers,

597

query={'key': api_key, 'prettyPrint': 'false'})

598

599

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

600

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

601

602

@staticmethod

603

def _extract_session_index(*data):

604

"""

605

Index of current account in account list.

606

See: https://github.com/yt-dlp/yt-dlp/pull/519

607

"""

608

for ytcfg in data:

609

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

610

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

615

if ytcfg:

616

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

621

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

622

'identity token', default=None, fatal=False)

623

624

@staticmethod

625

def _extract_account_syncid(*args):

626

"""

627

Extract syncId required to download private playlists of secondary channels

628

@params response and/or ytcfg

629

"""

630

for data in args:

631

# ytcfg includes channel_syncid if on secondary channel

632

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

637

lambda x: x['DATASYNC_ID']), str) or '').split('||')

638

if len(sync_ids) >= 2 and sync_ids[1]:

639

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

640

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

645

"""

646

Extracts visitorData from an API response or ytcfg

647

Appears to be used to track session state

648

"""

649

return get_first(

650

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

651

expected_type=str)

652

653

@functools.cached_property

654

def is_authenticated(self):

655

return bool(self._generate_sapisidhash_header())

656

657

def extract_ytcfg(self, video_id, webpage):

658

if not webpage:

659

return {}

660

return self._parse_json(

661

self._search_regex(

662

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

663

default='{}'), video_id, fatal=False) or {}

664

665

def generate_api_headers(

666

self, *, ytcfg=None, account_syncid=None, session_index=None,

667

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

668

669

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

670

headers = {

671

'X-YouTube-Client-Name': str(

672

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

673

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

674

'Origin': origin,

675

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

676

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

677

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

678

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

679

}

680

if session_index is None:

681

session_index = self._extract_session_index(ytcfg)

682

if account_syncid or session_index is not None:

683

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

684

685

auth = self._generate_sapisidhash_header(origin)

686

if auth is not None:

687

headers['Authorization'] = auth

688

headers['X-Origin'] = origin

689

return filter_dict(headers)

690

691

def _download_ytcfg(self, client, video_id):

692

url = {

693

'web': 'https://www.youtube.com',

694

'web_music': 'https://music.youtube.com',

695

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

700

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

701

return self.extract_ytcfg(video_id, webpage) or {}

702

703

@staticmethod

704

def _build_api_continuation_query(continuation, ctp=None):

705

query = {

706

'continuation': continuation

707

}

708

# TODO: Inconsistency with clickTrackingParams.

709

# Currently we have a fixed ctp contained within context (from ytcfg)

710

# and a ctp in root query for continuation.

711

if ctp:

712

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

717

next_continuation = try_get(

718

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

719

lambda x: x['continuation']['reloadContinuationData']), dict)

720

if not next_continuation:

721

return

722

continuation = next_continuation.get('continuation')

723

if not continuation:

724

return

725

ctp = next_continuation.get('clickTrackingParams')

726

return cls._build_api_continuation_query(continuation, ctp)

727

728

@classmethod

729

def _extract_continuation_ep_data(cls, continuation_ep: dict):

730

if isinstance(continuation_ep, dict):

731

continuation = try_get(

732

continuation_ep, lambda x: x['continuationCommand']['token'], str)

733

if not continuation:

734

return

735

ctp = continuation_ep.get('clickTrackingParams')

736

return cls._build_api_continuation_query(continuation, ctp)

737

738

@classmethod

739

def _extract_continuation(cls, renderer):

740

next_continuation = cls._extract_next_continuation_data(renderer)

741

if next_continuation:

742

return next_continuation

743

744

return traverse_obj(renderer, (

745

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

746

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

747

), get_all=False, expected_type=cls._extract_continuation_ep_data)

748

749

@classmethod

750

def _extract_alerts(cls, data):

751

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

752

if not isinstance(alert_dict, dict):

753

continue

754

for alert in alert_dict.values():

755

alert_type = alert.get('type')

756

if not alert_type:

757

continue

758

message = cls._get_text(alert, 'text')

759

if message:

760

yield alert_type, message

761

762

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

763

errors, warnings = [], []

764

for alert_type, alert_message in alerts:

765

if alert_type.lower() == 'error' and fatal:

766

errors.append([alert_type, alert_message])

767

elif alert_message not in self._IGNORED_WARNINGS:

768

warnings.append([alert_type, alert_message])

769

770

for alert_type, alert_message in (warnings + errors[:-1]):

771

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

772

if errors:

773

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

774

775

def _extract_and_report_alerts(self, data, *args, **kwargs):

776

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

777

778

def _extract_badges(self, renderer: dict):

779

privacy_icon_map = {

780

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

781

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

782

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

787

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

788

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

793

'private': BadgeType.AVAILABILITY_PRIVATE,

794

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

795

'live': BadgeType.LIVE_NOW,

796

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):

801

badge_type = (

802

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

803

or badge_style_map.get(traverse_obj(badge, 'style'))

804

)

805

if badge_type:

806

badges.append({'type': badge_type})

807

continue

808

809

# fallback, won't work in some languages

810

label = traverse_obj(badge, 'label', expected_type=str, default='')

811

for match, label_badge_type in label_map.items():

812

if match in label.lower():

813

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

820

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

821

822

@staticmethod

823

def _get_text(data, *path_list, max_runs=None):

824

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

829

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

830

obj = [obj]

831

for item in obj:

832

text = try_get(item, lambda x: x['simpleText'], str)

833

if text:

834

return text

835

runs = try_get(item, lambda x: x['runs'], list) or []

836

if not runs and isinstance(item, list):

837

runs = item

838

839

runs = runs[:min(len(runs), max_runs or len(runs))]

840

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))

if text:

return text

def _get_count(self, data, *path_list):

845

count_text = self._get_text(data, *path_list) or ''

846

count = parse_count(count_text)

847

if count is None:

848

count = str_to_int(

849

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

854

"""

855

Extract thumbnails from thumbnails dict

856

@param path_list: path list to level that contains 'thumbnails' key

857

"""

858

thumbnails = []

859

for path in path_list or [()]:

860

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):

861

thumbnail_url = url_or_none(thumbnail.get('url'))

862

if not thumbnail_url:

863

continue

864

# Sometimes youtube gives a wrong thumbnail URL. See:

865

# https://github.com/yt-dlp/yt-dlp/issues/233

866

# https://github.com/ytdl-org/youtube-dl/issues/28023

867

if 'maxresdefault' in thumbnail_url:

868

thumbnail_url = thumbnail_url.split('?')[0]

869

thumbnails.append({

870

'url': thumbnail_url,

871

'height': int_or_none(thumbnail.get('height')),

872

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

878

"""

879

Extracts a relative time from string and converts to dt object

880

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

885

if start:

886

return datetime_from_str(start)

887

try:

888

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

893

if not text:

894

return

895

dt = self.extract_relative_time(text)

896

timestamp = None

897

if isinstance(dt, datetime.datetime):

898

timestamp = calendar.timegm(dt.timetuple())

899

900

if timestamp is None:

901

timestamp = (

902

unified_timestamp(text) or unified_timestamp(

903

self._search_regex(

904

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

905

text.lower(), 'time text', default=None)))

906

907

if text and timestamp is None and self._preferred_lang in (None, 'en'):

908

self.report_warning(

909

f'Cannot parse localized time text "{text}"', only_once=True)

910

return timestamp

911

912

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

913

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

914

default_client='web'):

915

for retry in self.RetryManager():

916

try:

917

response = self._call_api(

918

ep=ep, fatal=True, headers=headers,

919

video_id=item_id, query=query, note=note,

920

context=self._extract_context(ytcfg, default_client),

921

api_key=self._extract_api_key(ytcfg, default_client),

922

api_hostname=api_hostname, default_client=default_client)

923

except ExtractorError as e:

924

if not isinstance(e.cause, network_exceptions):

925

return self._error_or_warning(e, fatal=fatal)

926

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

931

if not is_html(first_bytes):

932

yt_error = try_get(

933

self._parse_json(

934

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

935

lambda x: x['error']['message'], str)

936

if yt_error:

937

self._report_alerts([('ERROR', yt_error)], fatal=False)

938

# Downloading page may result in intermittent 5xx HTTP error

939

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

940

# We also want to catch all other network exceptions since errors in later pages can be troublesome

941

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

942

if e.cause.code not in (403, 429):

943

retry.error = e

944

continue

945

return self._error_or_warning(e, fatal=fatal)

946

947

try:

948

self._extract_and_report_alerts(response, only_once=True)

949

except ExtractorError as e:

950

# YouTube servers may return errors we want to retry on in a 200 OK response

951

# See: https://github.com/yt-dlp/yt-dlp/issues/839

952

if 'unknown error' in e.msg.lower():

953

retry.error = e

954

continue

955

return self._error_or_warning(e, fatal=fatal)

956

# Youtube sometimes sends incomplete data

957

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

958

if not traverse_obj(response, *variadic(check_get_keys)):

959

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

966

return re.match(r'(https?://)?music\.youtube\.com/', url) is not None

967

968

def _extract_video(self, renderer):

969

video_id = renderer.get('videoId')

970

971

reel_header_renderer = traverse_obj(renderer, (

972

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

973

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

974

975

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

976

description = self._get_text(renderer, 'descriptionSnippet')

977

978

duration = int_or_none(renderer.get('lengthSeconds'))

979

if duration is None:

980

duration = parse_duration(self._get_text(

981

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

982

if duration is None:

983

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

984

duration = parse_duration(self._search_regex(

985

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

986

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

987

video_id, default=None, group='duration'))

988

989

channel_id = traverse_obj(

990

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

991

expected_type=str, get_all=False)

992

if not channel_id:

993

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

994

995

overlay_style = traverse_obj(

996

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

997

get_all=False, expected_type=str)

998

badges = self._extract_badges(renderer)

999

1000

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

1001

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

1002

expected_type=str)) or ''

1003

url = f'https://www.youtube.com/watch?v={video_id}'

1004

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

1005

url = f'https://www.youtube.com/shorts/{video_id}'

1006

1007

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

1008

or self._get_text(reel_header_renderer, 'timestampText') or '')

1009

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

1010

1011

live_status = (

1012

'is_upcoming' if scheduled_timestamp is not None

1013

else 'was_live' if 'streamed' in time_text.lower()

1014

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

1015

else None)

1016

1017

# videoInfo is a string like '50K views • 10 years ago'.

1018

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

1019

view_count = (0 if 'no views' in view_count_text.lower()

1020

else self._get_count({'simpleText': view_count_text}))

1021

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

1030

'duration': duration,

1031

'channel_id': channel_id,

1032

'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')

1033

or self._get_text(reel_header_renderer, 'channelTitleText')),

1034

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

1035

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

1036

'timestamp': (self._parse_time_text(time_text)

1037

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

1038

else None),

1039

'release_timestamp': scheduled_timestamp,

1040

'availability':

1041

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

1042

else self._availability(

1043

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

1044

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

1045

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

1046

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

1047

view_count_field: view_count,

1048

'live_status': live_status

}

class YoutubeIE(YoutubeBaseInfoExtractor):

1053

IE_DESC = 'YouTube'

1054

_VALID_URL = r"""(?x)^

1055

(

1056

(?:https?://|//) # http(s):// or protocol-independent URL

1057

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1058

(?:www\.)?deturl\.com/www\.youtube\.com|

1059

(?:www\.)?pwnyoutube\.com|

1060

(?:www\.)?hooktube\.com|

1061

(?:www\.)?yourepeat\.com|

1062

tube\.majestyc\.net|

1063

%(invidious)s|

1064

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1065

(?:.*?\#/)? # handle anchor (#/) redirect urls

1066

(?: # the various things that can precede the ID:

1067

1068

|(?: # or the v= param in all its forms

1069

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1070

(?:\?|\#!?) # the params delimiter ? or # or #!

1071

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1077

vid\.plus| # or vid.plus/xxxx

1078

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1079

%(invidious)s

1080

)/

1081

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1082

)

1083

)? # all until now is optional -> you can pass the naked ID

1084

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1085

(?(1).+)? # if we found the ID, everything can follow

1086

(?:\#|$)""" % {

1087

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1101

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1102

\1''',

1103

# https://wordpress.org/plugins/lazy-load-for-videos/

1104

r'''(?xs)

1105

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1106

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1107

]

1108

_RETURN_TYPE = 'video' # XXX: How to handle multifeed?

1109

1110

_PLAYER_INFO_RE = (

1111

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1112

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1113

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1114

)

1115

_formats = {

1116

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1117

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1118

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1119

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1120

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1121

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1122

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1123

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1124

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1125

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1126

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1127

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1128

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1129

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1130

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1131

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1132

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1133

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1138

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1139

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1140

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1141

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1142

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1143

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1144

1145

# Apple HTTP Live Streaming

1146

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1147

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1148

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1149

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1150

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1151

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1152

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1153

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1154

1155

# DASH mp4 video

1156

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1157

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1158

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1159

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1160

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1161

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1162

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1163

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1164

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1165

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1166

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1167

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1168

1169

# Dash mp4 audio

1170

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1171

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1172

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1173

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1174

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1175

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1176

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1177

1178

# Dash webm

1179

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1180

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1181

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1182

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1183

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1184

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1185

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1186

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1187

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1188

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1189

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1190

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1191

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1192

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1193

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1194

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1195

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1196

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1197

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1198

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1199

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1200

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1201

1202

# Dash webm audio

1203

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1204

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1205

1206

# Dash webm audio with opus inside

1207

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1208

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1209

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1210

1211

# RTMP (unnamed)

1212

'_rtmp': {'protocol': 'rtmp'},

1213

1214

# av01 video only formats sometimes served with "unknown" codecs

1215

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1216

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1217

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1218

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1219

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1220

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1221

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1222

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1223

}

1224

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1236

'uploader': 'Philipp Hagemeister',

1237

'uploader_id': 'phihag',

1238

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1239

'channel': 'Philipp Hagemeister',

1240

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1241

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1242

'upload_date': '20121002',

1243

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1244

'categories': ['Science & Technology'],

1245

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1250

'playable_in_embed': True,

1251

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1252

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1257

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1262

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1267

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1268

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1269

'uploader': 'SET India',

1270

'uploader_id': 'setindia',

1271

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1272

'age_limit': 18,

1273

},

1274

'skip': 'Private video',

1275

},

1276

{

1277

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1278

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1283

'uploader': 'Philipp Hagemeister',

1284

'uploader_id': 'phihag',

1285

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1286

'channel': 'Philipp Hagemeister',

1287

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1288

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1289

'upload_date': '20121002',

1290

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1291

'categories': ['Science & Technology'],

1292

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1297

'playable_in_embed': True,

1298

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1299

'live_status': 'not_live',

1300

'age_limit': 0,

1301

'comment_count': int,

1302

'channel_follower_count': int

1303

},

1304

'params': {

1305

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1310

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1315

'uploader_id': '8KVIDEO',

1316

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1317

'description': '',

1318

'uploader': '8KVIDEO',

1319

'title': 'UHDTV TEST 8K VIDEO.mp4'

1320

},

1321

'params': {

1322

'youtube_include_dash_manifest': True,

1323

'format': '141',

1324

},

1325

'skip': 'format 141 not served anymore',

1326

},

1327

# DASH manifest with encrypted signature

1328

{

1329

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1334

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1335

'duration': 244,

1336

'uploader': 'AfrojackVEVO',

1337

'uploader_id': 'AfrojackVEVO',

1338

'upload_date': '20131011',

1339

'abr': 129.495,

1340

'like_count': int,

1341

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1342

'playable_in_embed': True,

1343

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1344

'view_count': int,

1345

'track': 'The Spark',

1346

'live_status': 'not_live',

1347

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1348

'channel': 'Afrojack',

1349

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1350

'tags': 'count:19',

1351

'availability': 'public',

1352

'categories': ['Music'],

1353

'age_limit': 0,

1354

'alt_title': 'The Spark',

1355

'channel_follower_count': int

1356

},

1357

'params': {

1358

'youtube_include_dash_manifest': True,

1359

'format': '141/bestaudio[ext=m4a]',

1360

},

1361

},

1362

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1363

{

1364

'note': 'Embed allowed age-gate video',

1365

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1370

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1371

'duration': 142,

1372

'uploader': 'The Witcher',

1373

'uploader_id': 'WitcherGame',

1374

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1375

'upload_date': '20140605',

1376

'age_limit': 18,

1377

'categories': ['Gaming'],

1378

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1379

'availability': 'needs_auth',

1380

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1381

'like_count': int,

1382

'channel': 'The Witcher',

1383

'live_status': 'not_live',

1384

'tags': 'count:17',

1385

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1386

'playable_in_embed': True,

1387

'view_count': int,

1388

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1393

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1398

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1399

'upload_date': '20200408',

1400

'uploader_id': 'FlyingKitty900',

1401

'uploader': 'FlyingKitty',

1402

'age_limit': 18,

1403

'availability': 'needs_auth',

1404

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1405

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1406

'channel': 'FlyingKitty',

1407

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1408

'view_count': int,

1409

'categories': ['Entertainment'],

1410

'live_status': 'not_live',

1411

'tags': ['Flyingkitty', 'godzilla 2'],

1412

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1413

'like_count': int,

1414

'duration': 177,

1415

'playable_in_embed': True,

1416

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1421

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1422

'info_dict': {

1423

'id': 'Tq92D6wQ1mg',

1424

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1425

'ext': 'mp4',

1426

'upload_date': '20191228',

1427

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1428

'uploader': 'Projekt Melody',

1429

'description': 'md5:17eccca93a786d51bc67646756894066',

1430

'age_limit': 18,

1431

'like_count': int,

1432

'availability': 'needs_auth',

1433

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1434

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1435

'view_count': int,

1436

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1437

'channel': 'Projekt Melody',

1438

'live_status': 'not_live',

1439

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1440

'playable_in_embed': True,

1441

'categories': ['Entertainment'],

1442

'duration': 106,

1443

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1444

'comment_count': int,

1445

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1450

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1455

'uploader': 'Herr Lurik',

1456

'uploader_id': 'st3in234',

1457

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1458

'upload_date': '20130730',

1459

'track': 'Such mich find mich',

1460

'age_limit': 0,

1461

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1462

'like_count': int,

1463

'playable_in_embed': False,

1464

'creator': 'OOMPH!',

1465

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1466

'view_count': int,

1467

'alt_title': 'Such mich find mich',

1468

'duration': 210,

1469

'channel': 'Herr Lurik',

1470

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1471

'categories': ['Music'],

1472

'availability': 'public',

1473

'uploader_url': 'http://www.youtube.com/user/st3in234',

1474

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1475

'live_status': 'not_live',

1476

'artist': 'OOMPH!',

1477

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1482

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1483

'only_matching': True,

1484

},

1485

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1486

# YouTube Red ad is not captured for creator

1487

{

1488

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1494

'uploader_id': 'deadmau5',

1495

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1496

'creator': 'deadmau5',

1497

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1498

'uploader': 'deadmau5',

1499

'title': 'Deadmau5 - Some Chords (HD)',

1500

'alt_title': 'Some Chords',

1501

'availability': 'public',

1502

'tags': 'count:14',

1503

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1504

'view_count': int,

1505

'live_status': 'not_live',

1506

'channel': 'deadmau5',

1507

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1508

'like_count': int,

1509

'track': 'Some Chords',

1510

'artist': 'deadmau5',

1511

'playable_in_embed': True,

1512

'age_limit': 0,

1513

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1514

'categories': ['Music'],

1515

'album': 'Some Chords',

1516

'channel_follower_count': int

1517

},

1518

'expected_warnings': [

1519

'DASH manifest missing',

1520

]

1521

},

1522

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1523

{

1524

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1530

'uploader_id': 'olympic',

1531

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1532

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1533

'uploader': 'Olympics',

1534

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1535

'like_count': int,

1536

'release_timestamp': 1343767800,

1537

'playable_in_embed': True,

1538

'categories': ['Sports'],

1539

'release_date': '20120731',

1540

'channel': 'Olympics',

1541

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1542

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1543

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1544

'age_limit': 0,

1545

'availability': 'public',

1546

'live_status': 'was_live',

1547

'view_count': int,

1548

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1549

'channel_follower_count': int

1550

},

1551

'params': {

1552

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1562

'duration': 85,

1563

'upload_date': '20110310',

1564

'uploader_id': 'AllenMeow',

1565

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1566

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1567

'uploader': '孫ᄋᄅ',

1568

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1569

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1574

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1575

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1576

'view_count': int,

1577

'categories': ['People & Blogs'],

1578

'like_count': int,

1579

'live_status': 'not_live',

1580

'availability': 'unlisted',

1581

'comment_count': int,

1582

'channel_follower_count': int

1583

},

1584

},

1585

# url_encoded_fmt_stream_map is empty string

1586

{

1587

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1592

'description': '',

1593

'upload_date': '20150404',

1594

'uploader_id': 'spbelect',

1595

'uploader': 'Наблюдатели Петербурга',

1596

},

1597

'params': {

1598

'skip_download': 'requires avconv',

1599

},

1600

'skip': 'This live event has ended.',

1601

},

1602

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1603

{

1604

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1609

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1610

'duration': 220,

1611

'upload_date': '20150625',

1612

'uploader_id': 'dorappi2000',

1613

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1614

'uploader': 'dorappi2000',

1615

'formats': 'mincount:31',

1616

},

1617

'skip': 'not actual anymore',

1618

},

1619

# DASH manifest with segment_list

1620

{

1621

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1622

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1627

'uploader': 'Airtek',

1628

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1629

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1630

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1631

},

1632

'params': {

1633

'youtube_include_dash_manifest': True,

1634

'format': '135', # bestvideo

1635

},

1636

'skip': 'This live event has ended.',

1637

},

1638

{

1639

# Multifeed videos (multiple cameras), URL can be of any Camera

1640

'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',

1641

'info_dict': {

1642

'id': 'zaPI8MvL8pg',

1643

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',

1644

'description': 'md5:563ccbc698b39298481ca3c571169519',

},

'playlist': [{

'info_dict': {

'id': 'j5yGuxZ8lLU',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',

1651

'uploader': 'WiiLikeToPlay',

1652

'description': 'md5:563ccbc698b39298481ca3c571169519',

1653

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1654

'duration': 10120,

1655

'channel_follower_count': int,

1656

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1657

'availability': 'public',

1658

'playable_in_embed': True,

1659

'upload_date': '20131105',

1660

'uploader_id': 'WiiRikeToPray',

1661

'categories': ['Gaming'],

1662

'live_status': 'was_live',

1663

'tags': 'count:24',

1664

'release_timestamp': 1383701910,

1665

'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',

1666

'comment_count': int,

1667

'age_limit': 0,

1668

'like_count': int,

1669

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1670

'channel': 'WiiLikeToPlay',

1671

'view_count': int,

1672

'release_date': '20131106',

},

}, {

'info_dict': {

'id': 'zaPI8MvL8pg',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',

1679

'uploader_id': 'WiiRikeToPray',

1680

'availability': 'public',

1681

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1682

'channel': 'WiiLikeToPlay',

1683

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1684

'channel_follower_count': int,

1685

'description': 'md5:563ccbc698b39298481ca3c571169519',

'duration': 10108,

'age_limit': 0,

'like_count': int,

'tags': 'count:24',

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1691

'uploader': 'WiiLikeToPlay',

1692

'release_timestamp': 1383701915,

1693

'comment_count': int,

1694

'upload_date': '20131105',

1695

'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',

1696

'release_date': '20131106',

1697

'playable_in_embed': True,

1698

'live_status': 'was_live',

1699

'categories': ['Gaming'],

'view_count': int,

},

}, {

'info_dict': {

'id': 'R7r3vfO7Hao',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',

1707

'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',

1708

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1709

'like_count': int,

1710

'availability': 'public',

1711

'playable_in_embed': True,

1712

'upload_date': '20131105',

1713

'description': 'md5:563ccbc698b39298481ca3c571169519',

1714

'uploader_id': 'WiiRikeToPray',

1715

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1716

'channel_follower_count': int,

1717

'tags': 'count:24',

1718

'release_date': '20131106',

1719

'uploader': 'WiiLikeToPlay',

1720

'comment_count': int,

1721

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1722

'channel': 'WiiLikeToPlay',

1723

'categories': ['Gaming'],

1724

'release_timestamp': 1383701914,

1725

'live_status': 'was_live',

'age_limit': 0,

'duration': 10128,

'view_count': int,

},

}],

'params': {'skip_download': True},

1732

},

1733

{

1734

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1735

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1736

'info_dict': {

1737

'id': 'gVfLd0zydlo',

1738

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1739

},

1740

'playlist_count': 2,

1741

'skip': 'Not multifeed anymore',

1742

},

1743

{

1744

'url': 'https://vid.plus/FlRa-iH7PGw',

1745

'only_matching': True,

1746

},

1747

{

1748

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1749

'only_matching': True,

1750

},

1751

{

1752

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1753

# Also tests cut-off URL expansion in video description (see

1754

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1755

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1756

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1761

'alt_title': 'Dark Walk',

1762

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1763

'duration': 133,

1764

'upload_date': '20151119',

1765

'uploader_id': 'IronSoulElf',

1766

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1767

'uploader': 'IronSoulElf',

1768

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1769

'track': 'Dark Walk',

1770

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1771

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1772

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1773

'categories': ['Film & Animation'],

1774

'view_count': int,

1775

'live_status': 'not_live',

1776

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1777

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1778

'tags': 'count:13',

1779

'availability': 'public',

1780

'channel': 'IronSoulElf',

1781

'playable_in_embed': True,

1782

'like_count': int,

1783

'age_limit': 0,

1784

'channel_follower_count': int

1785

},

1786

'params': {

1787

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1792

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1793

'only_matching': True,

1794

},

1795

{

1796

# Video with yt:stretch=17:0

1797

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1802

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1803

'upload_date': '20151107',

1804

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1805

'uploader': 'CH GAMER DROID',

1806

},

1807

'params': {

1808

'skip_download': True,

1809

},

1810

'skip': 'This video does not exist.',

1811

},

1812

{

1813

# Video with incomplete 'yt:stretch=16:'

1814

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1815

'only_matching': True,

1816

},

1817

{

1818

# Video licensed under Creative Commons

1819

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1824

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1825

'duration': 721,

1826

'upload_date': '20150128',

1827

'uploader_id': 'BerkmanCenter',

1828

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1829

'uploader': 'The Berkman Klein Center for Internet & Society',

1830

'license': 'Creative Commons Attribution license (reuse allowed)',

1831

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1832

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1833

'like_count': int,

1834

'age_limit': 0,

1835

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1836

'channel': 'The Berkman Klein Center for Internet & Society',

1837

'availability': 'public',

1838

'view_count': int,

1839

'categories': ['Education'],

1840

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1841

'live_status': 'not_live',

1842

'playable_in_embed': True,

1843

'comment_count': int,

1844

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1853

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1858

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1859

'duration': 4060,

1860

'upload_date': '20151120',

1861

'uploader': 'Bernie Sanders',

1862

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1863

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1864

'license': 'Creative Commons Attribution license (reuse allowed)',

1865

'playable_in_embed': True,

1866

'tags': 'count:12',

1867

'like_count': int,

1868

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1869

'age_limit': 0,

1870

'availability': 'public',

1871

'categories': ['News & Politics'],

1872

'channel': 'Bernie Sanders',

1873

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1874

'view_count': int,

1875

'live_status': 'not_live',

1876

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1877

'comment_count': int,

1878

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1887

'only_matching': True,

1888

},

1889

{

1890

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1891

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1892

'only_matching': True,

1893

},

1894

{

1895

# Rental video preview

1896

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1901

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1902

'upload_date': '20150811',

1903

'uploader': 'FlixMatrix',

1904

'uploader_id': 'FlixMatrixKaravan',

1905

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1906

'license': 'Standard YouTube License',

1907

},

1908

'params': {

1909

'skip_download': True,

1910

},

1911

'skip': 'This video is not available.',

1912

},

1913

{

1914

# YouTube Red video with episode data

1915

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1920

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1921

'duration': 2085,

1922

'upload_date': '20170118',

1923

'uploader': 'Vsauce',

1924

'uploader_id': 'Vsauce',

1925

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1926

'series': 'Mind Field',

1927

'season_number': 1,

1928

'episode_number': 1,

1929

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1930

'tags': 'count:12',

1931

'view_count': int,

1932

'availability': 'public',

1933

'age_limit': 0,

1934

'channel': 'Vsauce',

1935

'episode': 'Episode 1',

1936

'categories': ['Entertainment'],

1937

'season': 'Season 1',

1938

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1939

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1940

'like_count': int,

1941

'playable_in_embed': True,

1942

'live_status': 'not_live',

1943

'channel_follower_count': int

1944

},

1945

'params': {

1946

'skip_download': True,

1947

},

1948

'expected_warnings': [

1949

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1954

# as inappropriate or offensive to some audiences.

1955

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1960

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1961

'duration': 965,

1962

'upload_date': '20140124',

1963

'uploader': 'New Century Foundation',

1964

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1965

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1966

},

1967

'params': {

1968

'skip_download': True,

1969

},

1970

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1975

'only_matching': True,

1976

},

1977

{

1978

# geo restricted to JP

1979

'url': 'sJL6WA-aGkQ',

1980

'only_matching': True,

1981

},

1982

{

1983

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1984

'only_matching': True,

1985

},

1986

{

1987

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1988

'only_matching': True,

1989

},

1990

{

1991

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1992

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1993

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1998

'only_matching': True,

1999

},

2000

{

2001

# Video with unsupported adaptive stream type formats

2002

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

2007

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

2008

'duration': 433,

2009

'upload_date': '20130923',

2010

'uploader': 'Amelia Putri Harwita',

2011

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

2012

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

2013

'formats': 'maxcount:10',

2014

},

2015

'params': {

2016

'skip_download': True,

2017

'youtube_include_dash_manifest': False,

2018

},

2019

'skip': 'not actual anymore',

2020

},

2021

{

2022

# Youtube Music Auto-generated description

2023

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

2028

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

2029

'upload_date': '20190312',

2030

'uploader': 'Stephen - Topic',

2031

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

2032

'artist': 'Stephen',

2033

'track': 'Voyeur Girl',

2034

'album': 'it\'s too much love to know my dear',

2035

'release_date': '20190313',

2036

'release_year': 2019,

2037

'alt_title': 'Voyeur Girl',

2038

'view_count': int,

2039

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

2040

'playable_in_embed': True,

2041

'like_count': int,

2042

'categories': ['Music'],

2043

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

2044

'channel': 'Stephen',

2045

'availability': 'public',

2046

'creator': 'Stephen',

2047

'duration': 169,

2048

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

2049

'age_limit': 0,

2050

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

2051

'tags': 'count:11',

2052

'live_status': 'not_live',

2053

'channel_follower_count': int

2054

},

2055

'params': {

2056

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

2061

'only_matching': True,

2062

},

2063

{

2064

# invalid -> valid video id redirection

2065

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

2070

'description': 'md5:bf577a41da97918e94fa9798d9228825',

2071

'upload_date': '20090125',

2072

'uploader': 'Prochorowka',

2073

'uploader_id': 'Prochorowka',

2074

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

2075

'artist': 'Panjabi MC',

2076

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

2077

'album': 'Beware of the Boys (Mundian To Bach Ke)',

2078

},

2079

'params': {

2080

'skip_download': True,

2081

},

2082

'skip': 'Video unavailable',

2083

},

2084

{

2085

# empty description results in an empty string

2086

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2093

'uploader_id': 'ElevageOrVert',

2094

'uploader': 'ElevageOrVert',

2095

'view_count': int,

2096

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2097

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

2098

'like_count': int,

2099

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2100

'tags': [],

2101

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2102

'availability': 'public',

2103

'age_limit': 0,

2104

'categories': ['Pets & Animals'],

2105

'duration': 7,

2106

'playable_in_embed': True,

2107

'live_status': 'not_live',

2108

'channel': 'ElevageOrVert',

2109

'channel_follower_count': int

2110

},

2111

'params': {

2112

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2117

# see [2] for an example with '};' inside ytInitialPlayerResponse

2118

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2119

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2120

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2125

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2126

'upload_date': '20130831',

2127

'uploader_id': 'kudvenkat',

2128

'uploader': 'kudvenkat',

2129

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2130

'like_count': int,

2131

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2132

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2133

'live_status': 'not_live',

2134

'categories': ['Education'],

2135

'availability': 'public',

2136

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2137

'tags': 'count:12',

2138

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2143

'comment_count': int,

2144

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2153

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2154

'only_matching': True,

2155

},

2156

{

2157

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2158

'only_matching': True,

2159

},

2160

{

2161

# https://github.com/ytdl-org/youtube-dl/pull/28094

2162

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2168

'upload_date': '20141120',

2169

'uploader': 'The Cinematic Orchestra - Topic',

2170

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2171

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2172

'artist': 'The Cinematic Orchestra',

2173

'track': 'Burn Out',

2174

'album': 'Every Day',

2175

'like_count': int,

2176

'live_status': 'not_live',

2177

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2182

'creator': 'The Cinematic Orchestra',

2183

'channel': 'The Cinematic Orchestra',

2184

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2185

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2186

'availability': 'public',

2187

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2188

'categories': ['Music'],

2189

'playable_in_embed': True,

2190

'channel_follower_count': int

2191

},

2192

'params': {

2193

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2198

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2199

'only_matching': True,

2200

},

2201

{

2202

# controversial video, requires bpctr/contentCheckOk

2203

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2208

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2209

'uploader': 'CBS Mornings',

2210

'uploader_id': 'CBSThisMorning',

2211

'upload_date': '20140716',

2212

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2213

'duration': 170,

2214

'categories': ['News & Politics'],

2215

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2216

'view_count': int,

2217

'channel': 'CBS Mornings',

2218

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2219

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2220

'age_limit': 18,

2221

'availability': 'needs_auth',

2222

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2223

'like_count': int,

2224

'live_status': 'not_live',

2225

'playable_in_embed': True,

2226

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2231

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2236

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2237

'upload_date': '20201120',

2238

'uploader': 'Walk around Japan',

2239

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2240

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2241

'duration': 1456,

2242

'categories': ['Travel & Events'],

2243

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2244

'view_count': int,

2245

'channel': 'Walk around Japan',

2246

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2247

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2248

'age_limit': 0,

2249

'availability': 'public',

2250

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2251

'live_status': 'not_live',

2252

'playable_in_embed': True,

2253

'channel_follower_count': int

2254

},

2255

'params': {

2256

'skip_download': True,

2257

},

2258

}, {

2259

# Has multiple audio streams

2260

'url': 'WaOKSUlf4TM',

2261

'only_matching': True

2262

}, {

2263

# Requires Premium: has format 141 when requested using YTM url

2264

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2265

'only_matching': True

2266

}, {

2267

# multiple subtitles with same lang_code

2268

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2269

'only_matching': True,

2270

}, {

2271

# Force use android client fallback

2272

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2273

'info_dict': {

2274

'id': 'YOelRv7fMxY',

2275

'title': 'DIGGING A SECRET TUNNEL Part 1',

2276

'ext': '3gp',

2277

'upload_date': '20210624',

2278

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2279

'uploader': 'colinfurze',

2280

'uploader_id': 'colinfurze',

2281

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2282

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2283

'duration': 596,

2284

'categories': ['Entertainment'],

2285

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2286

'view_count': int,

2287

'channel': 'colinfurze',

2288

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2289

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2290

'age_limit': 0,

2291

'availability': 'public',

2292

'like_count': int,

2293

'live_status': 'not_live',

2294

'playable_in_embed': True,

2295

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2300

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2305

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2306

'only_matching': True,

2307

'params': {

2308

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2313

'only_matching': True,

2314

}, {

2315

'note': 'Storyboards',

2316

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2322

'uploader_id': 'scishow',

2323

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2324

'upload_date': '20140324',

2325

'uploader': 'SciShow',

2326

'like_count': int,

2327

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2328

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2329

'view_count': int,

2330

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2331

'playable_in_embed': True,

2332

'tags': 'count:12',

2333

'uploader_url': 'http://www.youtube.com/user/scishow',

2334

'availability': 'public',

2335

'channel': 'SciShow',

2336

'live_status': 'not_live',

2337

'duration': 248,

2338

'categories': ['Education'],

2339

'age_limit': 0,

2340

'channel_follower_count': int,

2341

'chapters': list,

2342

}, 'params': {'format': 'mhtml', 'skip_download': True}

2343

}, {

2344

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2345

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2350

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2351

'uploader': 'Leon Nguyen',

2352

'uploader_id': 'VNSXIII',

2353

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2354

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2355

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2360

'tags': 'count:23',

2361

'playable_in_embed': True,

2362

'live_status': 'not_live',

2363

'upload_date': '20220103',

2364

'like_count': int,

2365

'availability': 'public',

2366

'channel': 'Leon Nguyen',

2367

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2368

'comment_count': int,

2369

'channel_follower_count': int

2370

}

2371

}, {

2372

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2373

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2378

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2379

'uploader': 'Leon Nguyen',

2380

'uploader_id': 'VNSXIII',

2381

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2382

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2383

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2388

'tags': 'count:23',

2389

'playable_in_embed': True,

2390

'live_status': 'not_live',

2391

'upload_date': '20220102',

2392

'like_count': int,

2393

'availability': 'public',

2394

'channel': 'Leon Nguyen',

2395

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2396

'comment_count': int,

2397

'channel_follower_count': int

2398

},

2399

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2400

}, {

2401

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2402

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2407

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2408

'uploader': 'Quackity',

2409

'uploader_id': 'QuackityHQ',

2410

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2411

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2412

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2417

'tags': 'count:26',

2418

'playable_in_embed': True,

2419

'live_status': 'not_live',

2420

'release_timestamp': 1641172509,

2421

'release_date': '20220103',

2422

'upload_date': '20220103',

2423

'like_count': int,

2424

'availability': 'public',

2425

'channel': 'Quackity',

2426

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2427

'channel_follower_count': int

2428

}

2429

},

2430

{ # continuous livestream. Microformat upload date should be preferred.

2431

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2432

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2433

'info_dict': {

2434

'id': 'kgx4WGK0oNU',

2435

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2436

'ext': 'mp4',

2437

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2438

'availability': 'public',

2439

'age_limit': 0,

2440

'release_timestamp': 1637975704,

2441

'upload_date': '20210619',

2442

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2443

'live_status': 'is_live',

2444

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2445

'uploader': '阿鲍Abao',

2446

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2447

'channel': 'Abao in Tokyo',

2448

'channel_follower_count': int,

2449

'release_date': '20211127',

2450

'tags': 'count:39',

2451

'categories': ['People & Blogs'],

2452

'like_count': int,

2453

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2454

'view_count': int,

2455

'playable_in_embed': True,

2456

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2457

'concurrent_view_count': int,

2458

},

2459

'params': {'skip_download': True}

2460

}, {

2461

# Story. Requires specific player params to work.

2462

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2467

'view_count': int,

2468

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2469

'upload_date': '20220526',

2470

'categories': ['Education'],

2471

'title': 'Story',

2472

'channel': 'IT\'S HISTORY',

2473

'description': '',

2474

'uploader_id': 'BlastfromthePast',

2475

'duration': 12,

2476

'uploader': 'IT\'S HISTORY',

2477

'playable_in_embed': True,

2478

'age_limit': 0,

2479

'live_status': 'not_live',

2480

'tags': [],

2481

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2482

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2483

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2484

},

2485

'skip': 'stories get removed after some period of time',

2486

}, {

2487

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2492

'upload_date': '20220323',

2493

'like_count': int,

2494

'availability': 'unlisted',

2495

'channel': 'nao20010128nao',

2496

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2497

'age_limit': 0,

2498

'uploader': 'nao20010128nao',

2499

'uploader_id': 'nao20010128nao',

2500

'categories': ['Music'],

2501

'view_count': int,

2502

'description': '',

2503

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2504

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2505

'live_status': 'not_live',

2506

'playable_in_embed': True,

2507

'channel_follower_count': int,

2508

'duration': 6,

2509

'tags': [],

2510

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2511

}

2512

}, {

2513

# Prefer primary title+description language metadata by default

2514

# Do not prefer translated description if primary is empty

2515

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2520

'description': '',

2521

'channel': 'cole-dlp-test-acc',

2522

'tags': [],

2523

'view_count': int,

2524

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2525

'like_count': int,

2526

'playable_in_embed': True,

2527

'availability': 'unlisted',

2528

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2529

'age_limit': 0,

2530

'duration': 5,

2531

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2532

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2533

'live_status': 'not_live',

2534

'upload_date': '20220908',

2535

'categories': ['People & Blogs'],

2536

'uploader': 'cole-dlp-test-acc',

2537

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2538

},

2539

'params': {'skip_download': True}

2540

}, {

2541

# Extractor argument: prefer translated title+description

2542

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2547

'tags': [],

2548

'duration': 5,

2549

'live_status': 'not_live',

2550

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2551

'upload_date': '20220728',

2552

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2553

'view_count': int,

2554

'categories': ['People & Blogs'],

2555

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2556

'title': 'dlp test video title translated (fr)',

2557

'availability': 'public',

2558

'uploader': 'cole-dlp-test-acc',

2559

'age_limit': 0,

2560

'description': 'dlp test video description translated (fr)',

2561

'playable_in_embed': True,

2562

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2563

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2564

},

2565

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2566

'expected_warnings': [r'Preferring "fr" translated fields'],

2567

}, {

2568

'note': '6 channel audio',

2569

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2570

'only_matching': True,

2571

}, {

2572

'note': 'Multiple HLS formats with same itag',

2573

'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',

'info_dict': {

'id': 'kX3nB4PpJko',

'ext': 'mp4',

'categories': ['Entertainment'],

2578

'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',

2579

'uploader_url': 'http://www.youtube.com/user/MrBeast6000',

2580

'live_status': 'not_live',

2581

'duration': 937,

2582

'channel_follower_count': int,

2583

'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',

2584

'title': 'Last To Take Hand Off Jet, Keeps It!',

2585

'channel': 'MrBeast',

2586

'playable_in_embed': True,

2587

'view_count': int,

2588

'upload_date': '20221112',

2589

'uploader': 'MrBeast',

2590

'uploader_id': 'MrBeast6000',

2591

'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',

2592

'age_limit': 0,

2593

'availability': 'public',

2594

'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',

'like_count': int,

'tags': [],

},

'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},

2599

}, {

2600

'note': 'Audio formats with Dynamic Range Compression',

2601

'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',

'info_dict': {

'id': 'Tq92D6wQ1mg',

'ext': 'weba',

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

2606

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2607

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2608

'channel_follower_count': int,

2609

'description': 'md5:17eccca93a786d51bc67646756894066',

2610

'upload_date': '20191228',

2611

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2612

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

2613

'playable_in_embed': True,

2614

'like_count': int,

2615

'categories': ['Entertainment'],

2616

'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',

2617

'age_limit': 18,

2618

'channel': 'Projekt Melody',

2619

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2620

'view_count': int,

2621

'availability': 'needs_auth',

2622

'comment_count': int,

2623

'live_status': 'not_live',

2624

'uploader': 'Projekt Melody',

2625

'duration': 106,

2626

},

2627

'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},

2628

},

2629

{

2630

'url': 'https://www.youtube.com/live/qVv6vCqciTM',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2636

'comment_count': int,

2637

'chapters': 'count:13',

2638

'upload_date': '20221223',

2639

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

2640

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2641

'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2642

'like_count': int,

2643

'release_date': '20221223',

2644

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

2645

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

2646

'view_count': int,

2647

'playable_in_embed': True,

2648

'duration': 4438,

2649

'availability': 'public',

2650

'channel_follower_count': int,

2651

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2652

'categories': ['Entertainment'],

2653

'live_status': 'was_live',

2654

'release_timestamp': 1671793345,

2655

'channel': 'さなちゃんねる',

2656

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

2657

'uploader': 'さなちゃんねる',

},

},

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2664

{

2665

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2666

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2671

'upload_date': '20080526',

2672

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2673

'uploader': 'Christopher Sykes',

2674

'uploader_id': 'ChristopherJSykes',

2675

'age_limit': 0,

2676

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2677

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2678

'playable_in_embed': True,

2679

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2680

'like_count': int,

2681

'comment_count': int,

2682

'channel': 'Christopher Sykes',

2683

'live_status': 'not_live',

2684

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2685

'availability': 'public',

2686

'duration': 195,

2687

'view_count': int,

2688

'categories': ['Science & Technology'],

2689

'channel_follower_count': int,

2690

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2691

},

2692

'params': {

2693

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2700

from ..utils import parse_qs

2701

2702

qs = parse_qs(url)

2703

if qs.get('list', [None])[0]:

2704

return False

2705

return super().suitable(url)

2706

2707

def __init__(self, *args, **kwargs):

2708

super().__init__(*args, **kwargs)

2709

self._code_cache = {}

2710

self._player_cache = {}

2711

2712

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2713

lock = threading.Lock()

2714

start_time = time.time()

2715

formats = [f for f in formats if f.get('is_from_start')]

2716

2717

def refetch_manifest(format_id, delay):

2718

nonlocal formats, start_time, is_live

2719

if time.time() <= start_time + delay:

2720

return

2721

2722

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2723

video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)

2724

microformats = traverse_obj(

2725

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2726

expected_type=dict)

2727

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2728

is_live = live_status == 'is_live'

2729

start_time = time.time()

2730

2731

def mpd_feed(format_id, delay):

2732

"""

2733

@returns (manifest_url, manifest_stream_number, is_live) or None

2734

"""

2735

for retry in self.RetryManager(fatal=False):

2736

with lock:

2737

refetch_manifest(format_id, delay)

2738

2739

f = next((f for f in formats if f['format_id'] == format_id), None)

2740

if not f:

2741

if not is_live:

2742

retry.error = f'{video_id}: Video is no longer live'

2743

else:

2744

retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'

2745

continue

2746

return f['manifest_url'], f['manifest_stream_number'], is_live

return None

for f in formats:

f['is_live'] = is_live

2751

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2752

live_start_time, mpd_feed, not is_live and f.copy())

2753

if is_live:

2754

f['fragments'] = gen

2755

f['protocol'] = 'http_dash_segments_generator'

2756

else:

2757

f['fragments'] = LazyList(gen({}))

2758

del f['is_from_start']

2759

2760

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2761

FETCH_SPAN, MAX_DURATION = 5, 432000

2762

2763

mpd_url, stream_number, is_live = None, None, True

2764

2765

begin_index = 0

2766

download_start_time = ctx.get('start') or time.time()

2767

2768

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2769

if lack_early_segments:

2770

self.report_warning(bug_reports_message(

2771

'Starting download from the last 120 hours of the live stream since '

2772

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2773

lack_early_segments = True

2774

2775

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2776

fragments, fragment_base_url = None, None

2777

2778

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2779

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2780

# Obtain from MPD's maximum seq value

2781

old_mpd_url = mpd_url

2782

last_error = ctx.pop('last_error', None)

2783

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2784

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2785

or (mpd_url, stream_number, False))

2786

if not refresh_sequence:

2787

if expire_fast and not is_live:

2788

return False, last_seq

2789

elif old_mpd_url == mpd_url:

2790

return True, last_seq

2791

if manifestless_orig_fmt:

2792

fmt_info = manifestless_orig_fmt

2793

else:

2794

try:

2795

fmts, _ = self._extract_mpd_formats_and_subtitles(

2796

mpd_url, None, note=False, errnote=False, fatal=False)

2797

except ExtractorError:

2798

fmts = None

2799

if not fmts:

2800

no_fragment_score += 2

2801

return False, last_seq

2802

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2803

fragments = fmt_info['fragments']

2804

fragment_base_url = fmt_info['fragment_base_url']

2805

assert fragment_base_url

2806

2807

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2808

return True, _last_seq

2809

2810

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2811

while is_live:

2812

fetch_time = time.time()

2813

if no_fragment_score > 30:

2814

return

2815

if last_segment_url:

2816

# Obtain from "X-Head-Seqnum" header value from each segment

2817

try:

2818

urlh = self._request_webpage(

2819

last_segment_url, None, note=False, errnote=False, fatal=False)

2820

except ExtractorError:

2821

urlh = None

2822

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2823

if last_seq is None:

2824

no_fragment_score += 2

2825

last_segment_url = None

2826

continue

2827

else:

2828

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2829

no_fragment_score += 2

2830

if not should_continue:

2831

continue

2832

2833

if known_idx > last_seq:

2834

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2840

# skip from the start when it's negative value

2841

known_idx = last_seq + begin_index

2842

if lack_early_segments:

2843

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2844

try:

2845

for idx in range(known_idx, last_seq):

2846

# do not update sequence here or you'll get skipped some part of it

2847

should_continue, _ = _extract_sequence_from_mpd(False, False)

2848

if not should_continue:

2849

known_idx = idx - 1

2850

raise ExtractorError('breaking out of outer loop')

2851

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2852

yield {

2853

'url': last_segment_url,

2854

'fragment_count': last_seq,

2855

}

2856

if known_idx == last_seq:

2857

no_fragment_score += 5

2858

else:

2859

no_fragment_score = 0

2860

known_idx = last_seq

2861

except ExtractorError:

2862

continue

2863

2864

if manifestless_orig_fmt:

2865

# Stop at the first iteration if running for post-live manifestless;

2866

# fragment count no longer increase since it starts

2867

break

2868

2869

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2870

2871

def _extract_player_url(self, *ytcfgs, webpage=None):

2872

player_url = traverse_obj(

2873

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2874

get_all=False, expected_type=str)

2875

if not player_url:

2876

return

2877

return urljoin('https://www.youtube.com', player_url)

2878

2879

def _download_player_url(self, video_id, fatal=False):

2880

res = self._download_webpage(

2881

'https://www.youtube.com/iframe_api',

2882

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2883

if res:

2884

player_version = self._search_regex(

2885

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2886

if player_version:

2887

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2888

2889

def _signature_cache_id(self, example_sig):

2890

""" Return a string representation of a signature """

2891

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2892

2893

@classmethod

2894

def _extract_player_info(cls, player_url):

2895

for player_re in cls._PLAYER_INFO_RE:

2896

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2901

return id_m.group('id')

2902

2903

def _load_player(self, video_id, player_url, fatal=True):

2904

player_id = self._extract_player_info(player_url)

2905

if player_id not in self._code_cache:

2906

code = self._download_webpage(

2907

player_url, video_id, fatal=fatal,

2908

note='Downloading player ' + player_id,

2909

errnote='Download of %s failed' % player_url)

2910

if code:

2911

self._code_cache[player_id] = code

2912

return self._code_cache.get(player_id)

2913

2914

def _extract_signature_function(self, video_id, player_url, example_sig):

2915

player_id = self._extract_player_info(player_url)

2916

2917

# Read from filesystem cache

2918

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2919

assert os.path.basename(func_id) == func_id

2920

2921

self.write_debug(f'Extracting signature function {func_id}')

2922

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2923

2924

if not cache_spec:

2925

code = self._load_player(video_id, player_url)

2926

if code:

2927

res = self._parse_sig_js(code)

2928

test_string = ''.join(map(chr, range(len(example_sig))))

2929

cache_spec = [ord(c) for c in res(test_string)]

2930

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2931

2932

return lambda s: ''.join(s[i] for i in cache_spec)

2933

2934

def _print_sig_code(self, func, example_sig):

2935

if not self.get_param('youtube_print_sig_code'):

2936

return

2937

2938

def gen_sig_code(idxs):

2939

def _genslice(start, end, step):

2940

starts = '' if start == 0 else str(start)

2941

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2942

steps = '' if step == 1 else (':%d' % step)

2943

return f's[{starts}{ends}{steps}]'

2944

2945

step = None

2946

# Quelch pyflakes warnings - start will be set when step is set

2947

start = '(Never used)'

2948

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2953

step = None

2954

continue

2955

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2965

2966

test_string = ''.join(map(chr, range(len(example_sig))))

2967

cache_res = func(test_string)

2968

cache_spec = [ord(c) for c in cache_res]

2969

expr_code = ' + '.join(gen_sig_code(cache_spec))

2970

signature_id_tuple = '(%s)' % (

2971

', '.join(str(len(p)) for p in example_sig.split('.')))

2972

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2973

' return %s\n') % (signature_id_tuple, expr_code)

2974

self.to_screen('Extracted signature function:\n' + code)

2975

2976

def _parse_sig_js(self, jscode):

2977

funcname = self._search_regex(

2978

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2979

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2980

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2981

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2982

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2983

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2984

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2985

# Obsolete patterns

2986

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2987

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2988

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2989

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2990

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2991

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2992

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2993

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2994

jscode, 'Initial JS player signature function name', group='sig')

2995

2996

jsi = JSInterpreter(jscode)

2997

initial_function = jsi.extract_function(funcname)

2998

return lambda s: initial_function([s])

2999

3000

def _cached(self, func, *cache_id):

3001

def inner(*args, **kwargs):

3002

if cache_id not in self._player_cache:

3003

try:

3004

self._player_cache[cache_id] = func(*args, **kwargs)

3005

except ExtractorError as e:

3006

self._player_cache[cache_id] = e

3007

except Exception as e:

3008

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

3009

3010

ret = self._player_cache[cache_id]

3011

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

3017

"""Turn the encrypted s field into a working signature"""

3018

extract_sig = self._cached(

3019

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

3020

func = extract_sig(video_id, player_url, s)

3021

self._print_sig_code(func, s)

3022

return func(s)

3023

3024

def _decrypt_nsig(self, s, video_id, player_url):

3025

"""Turn the encrypted n field into a working signature"""

3026

if player_url is None:

3027

raise ExtractorError('Cannot decrypt nsig without player_url')

3028

player_url = urljoin('https://www.youtube.com', player_url)

3029

3030

try:

3031

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

3032

except ExtractorError as e:

3033

raise ExtractorError('Unable to extract nsig function code', cause=e)

3034

if self.get_param('youtube_print_sig_code'):

3035

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

3036

3037

try:

3038

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

3039

ret = extract_nsig(jsi, func_code)(s)

3040

except JSInterpreter.Exception as e:

3041

try:

3042

jsi = PhantomJSwrapper(self, timeout=5000)

3043

except ExtractorError:

3044

raise e

3045

self.report_warning(

3046

f'Native nsig extraction failed: Trying with PhantomJS\n'

3047

f' n = {s} ; player = {player_url}', video_id)

3048

self.write_debug(e, only_once=True)

3049

3050

args, func_body = func_code

3051

ret = jsi.execute(

3052

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

3053

video_id=video_id, note='Executing signature code').strip()

3054

3055

self.write_debug(f'Decrypted nsig {s} => {ret}')

3056

return ret

3057

3058

def _extract_n_function_name(self, jscode):

3059

funcname, idx = self._search_regex(

3060

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

3061

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

3066

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

3067

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

3068

3069

def _extract_n_function_code(self, video_id, player_url):

3070

player_id = self._extract_player_info(player_url)

3071

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

3072

jscode = func_code or self._load_player(video_id, player_url)

3073

jsi = JSInterpreter(jscode)

3074

3075

if func_code:

3076

return jsi, player_id, func_code

3077

3078

func_name = self._extract_n_function_name(jscode)

3079

3080

# For redundancy

3081

func_code = self._search_regex(

3082

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

3083

# NB: The end of the regex is intentionally kept strict

3084

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

3085

jscode, 'nsig function', group=('var', 'code'), default=None)

3086

if func_code:

3087

func_code = ([func_code[0]], func_code[1])

3088

else:

3089

self.write_debug('Extracting nsig function with jsinterp')

3090

func_code = jsi.extract_function_code(func_name)

3091

3092

self.cache.store('youtube-nsig', player_id, func_code)

3093

return jsi, player_id, func_code

3094

3095

def _extract_n_function_from_code(self, jsi, func_code):

3096

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

3102

raise

3103

except Exception as e:

3104

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

3105

3106

if ret.startswith('enhanced_except_'):

3107

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

3113

"""

3114

Extract signatureTimestamp (sts)

3115

Required to tell API what sig/player version is in use.

3116

"""

3117

sts = None

3118

if isinstance(ytcfg, dict):

3119

sts = int_or_none(ytcfg.get('STS'))

3120

3121

if not sts:

3122

# Attempt to extract from player

3123

if player_url is None:

3124

error_msg = 'Cannot extract signature timestamp without player_url.'

3125

if fatal:

3126

raise ExtractorError(error_msg)

3127

self.report_warning(error_msg)

3128

return

3129

code = self._load_player(video_id, player_url, fatal=fatal)

3130

if code:

3131

sts = int_or_none(self._search_regex(

3132

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

3133

'JS player signature timestamp', group='sts', fatal=fatal))

3134

return sts

3135

3136

def _mark_watched(self, video_id, player_responses):

3137

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

3138

label = 'fully ' if is_full else ''

3139

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

3140

expected_type=url_or_none)

3141

if not url:

3142

self.report_warning(f'Unable to mark {label}watched')

3143

return

3144

parsed_url = urllib.parse.urlparse(url)

3145

qs = urllib.parse.parse_qs(parsed_url.query)

3146

3147

# cpn generation algorithm is reverse engineered from base.js.

3148

# In fact it works even with dummy cpn.

3149

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

3150

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

3151

3152

# # more consistent results setting it to right before the end

3153

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

3164

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

3171

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

3172

3173

self._download_webpage(

3174

url, video_id, f'Marking {label}watched',

3175

'Unable to mark watched', fatal=False)

3176

3177

@classmethod

3178

def _extract_from_webpage(cls, url, webpage):

3179

# Invidious Instances

3180

# https://github.com/yt-dlp/yt-dlp/issues/195

3181

# https://github.com/iv-org/invidious/pull/1730

3182

mobj = re.search(

3183

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3184

webpage)

3185

if mobj:

3186

yield cls.url_result(mobj.group('url'), cls)

3187

raise cls.StopExtraction()

3188

3189

yield from super()._extract_from_webpage(url, webpage)

3190

3191

# lazyYT YouTube embed

3192

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3193

yield cls.url_result(unescapeHTML(id_), cls, id_)

3194

3195

# Wordpress "YouTube Video Importer" plugin

3196

for m in re.findall(r'''(?x)<div[^>]+

3197

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3198

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3199

yield cls.url_result(m[-1], cls, m[-1])

3200

3201

@classmethod

3202

def extract_id(cls, url):

3203

video_id = cls.get_temp_id(url)

3204

if not video_id:

3205

raise ExtractorError(f'Invalid URL: {url}')

3206

return video_id

3207

3208

def _extract_chapters_from_json(self, data, duration):

3209

chapter_list = traverse_obj(

3210

data, (

3211

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3212

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3213

), expected_type=list)

3214

3215

return self._extract_chapters_helper(

3216

chapter_list,

3217

start_function=lambda chapter: float_or_none(

3218

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3219

title_function=lambda chapter: traverse_obj(

3220

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3221

duration=duration)

3222

3223

def _extract_chapters_from_engagement_panel(self, data, duration):

3224

content_list = traverse_obj(

3225

data,

3226

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3227

expected_type=list)

3228

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3229

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3230

3231

return next(filter(None, (

3232

self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3233

chapter_time, chapter_title, duration)

3234

for contents in content_list)), [])

3235

3236

def _extract_comment(self, comment_renderer, parent=None):

3237

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3242

3243

# Timestamp is an estimate calculated from the current time and time_text

3244

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3245

timestamp = self._parse_time_text(time_text)

3246

3247

author = self._get_text(comment_renderer, 'authorText')

3248

author_id = try_get(comment_renderer,

3249

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3250

3251

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3252

lambda x: x['likeCount']), str)) or 0

3253

author_thumbnail = try_get(comment_renderer,

3254

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3255

3256

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3257

is_favorited = 'creatorHeart' in (try_get(

3258

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3263

'time_text': time_text,

3264

'like_count': votes,

3265

'is_favorited': is_favorited,

3266

'author': author,

3267

'author_id': author_id,

3268

'author_thumbnail': author_thumbnail,

3269

'author_is_uploader': author_is_uploader,

3270

'parent': parent or 'root'

3271

}

3272

3273

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3274

3275

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3276

3277

def extract_header(contents):

3278

_continuation = None

3279

for content in contents:

3280

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3281

expected_comment_count = self._get_count(

3282

comments_header_renderer, 'countText', 'commentsCount')

3283

3284

if expected_comment_count:

3285

tracker['est_total'] = expected_comment_count

3286

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3287

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3288

3289

sort_menu_item = try_get(

3290

comments_header_renderer,

3291

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3292

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3293

3294

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3295

if not _continuation:

3296

continue

3297

3298

sort_text = str_or_none(sort_menu_item.get('title'))

3299

if not sort_text:

3300

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3301

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3306

if not parent:

3307

tracker['current_page_thread'] = 0

3308

for content in contents:

3309

if not parent and tracker['total_parent_comments'] >= max_parents:

3310

yield

3311

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3312

comment_renderer = get_first(

3313

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3314

expected_type=dict, default={})

3315

3316

comment = self._extract_comment(comment_renderer, parent)

3317

if not comment:

3318

continue

3319

# Sometimes YouTube may break and give us infinite looping comments.

3320

# See: https://github.com/yt-dlp/yt-dlp/issues/6290

3321

if comment['id'] in tracker['seen_comment_ids']:

3322

self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')

3323

yield

3324

else:

3325

tracker['seen_comment_ids'].add(comment['id'])

3326

3327

tracker['running_total'] += 1

3328

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3329

yield comment

3330

3331

# Attempt to get the replies

3332

comment_replies_renderer = try_get(

3333

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3334

3335

if comment_replies_renderer:

3336

tracker['current_page_thread'] += 1

3337

comment_entries_iter = self._comment_entries(

3338

comment_replies_renderer, ytcfg, video_id,

3339

parent=comment.get('id'), tracker=tracker)

3340

yield from itertools.islice(comment_entries_iter, min(

3341

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3342

3343

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3349

total_parent_comments=0,

3350

total_reply_comments=0,

3351

seen_comment_ids=set())

3352

3353

# TODO: Deprecated

3354

# YouTube comments have a max depth of 2

3355

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3356

if max_depth:

3357

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3358

'Set max replies in the max-comments extractor argument instead')

3359

if max_depth == 1 and parent:

3360

return

3361

3362

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3363

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3364

3365

continuation = self._extract_continuation(root_continuation_data)

3366

3367

response = None

3368

is_forced_continuation = False

3369

is_first_continuation = parent is None

3370

if is_first_continuation and not continuation:

3371

# Sometimes you can get comments by generating the continuation yourself,

3372

# even if YouTube initially reports them being disabled - e.g. stories comments.

3373

# Note: if the comment section is actually disabled, YouTube may return a response with

3374

# required check_get_keys missing. So we will disable that check initially in this case.

3375

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3376

is_forced_continuation = True

3377

3378

for page_num in itertools.count(0):

3379

if not continuation:

3380

break

3381

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3382

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3383

if page_num == 0:

3384

if is_first_continuation:

3385

note_prefix = 'Downloading comment section API JSON'

3386

else:

3387

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3388

tracker['current_page_thread'], comment_prog_str)

3389

else:

3390

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3391

' ' if parent else '', ' replies' if parent else '',

3392

page_num, comment_prog_str)

3393

try:

3394

response = self._extract_response(

3395

item_id=None, query=continuation,

3396

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3397

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3398

except ExtractorError as e:

3399

# Ignore incomplete data error for replies if retries didn't work.

3400

# This is to allow any other parent comments and comment threads to be downloaded.

3401

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3402

if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:

3403

self.report_warning(

3404

'Received incomplete data for a comment reply thread and retrying did not help. '

3405

'Ignoring to let other comments be downloaded.')

3406

else:

3407

raise

3408

is_forced_continuation = False

3409

continuation_contents = traverse_obj(

3410

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3411

3412

continuation = None

3413

for continuation_section in continuation_contents:

3414

continuation_items = traverse_obj(

3415

continuation_section,

3416

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3417

get_all=False, expected_type=list) or []

3418

if is_first_continuation:

3419

continuation = extract_header(continuation_items)

3420

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3434

if message and not parent and tracker['running_total'] == 0:

3435

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3436

raise self.CommentsDisabled

3437

3438

@staticmethod

3439

def _generate_comment_continuation(video_id):

3440

"""

3441

Generates initial comment section continuation token from given video id

3442

"""

3443

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3444

return base64.b64encode(token.encode()).decode()

3445

3446

def _get_comments(self, ytcfg, video_id, contents, webpage):

3447

"""Entry for comment extraction"""

3448

def _real_comment_extract(contents):

3449

renderer = next((

3450

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3451

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3452

yield from self._comment_entries(renderer, ytcfg, video_id)

3453

3454

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3455

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3456

3457

@staticmethod

3458

def _get_checkok_params():

3459

return {'contentCheckOk': True, 'racyCheckOk': True}

3460

3461

@classmethod

3462

def _generate_player_context(cls, sts=None):

3463

context = {

3464

'html5Preference': 'HTML5_PREF_WANTS',

3465

}

3466

if sts is not None:

3467

context['signatureTimestamp'] = sts

3468

return {

3469

'playbackContext': {

3470

'contentPlaybackContext': context

3471

},

3472

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3477

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3478

return True

3479

3480

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))

3481

AGE_GATE_REASONS = (

3482

'confirm your age', 'age-restricted', 'inappropriate', # reason

3483

'age_verification_required', 'age_check_required', # status

3484

)

3485

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3486

3487

@staticmethod

3488

def _is_unplayable(player_response):

3489

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3490

3491

_STORY_PLAYER_PARAMS = '8AEB'

3492

3493

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3494

3495

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3496

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3497

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3498

headers = self.generate_api_headers(

3499

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3505

yt_query['params'] = self._STORY_PLAYER_PARAMS

3506

3507

yt_query.update(self._generate_player_context(sts))

3508

return self._extract_response(

3509

item_id=video_id, ep='player', query=yt_query,

3510

ytcfg=player_ytcfg, headers=headers, fatal=True,

3511

default_client=client,

3512

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3513

) or None

3514

3515

def _get_requested_clients(self, url, smuggled_data):

3516

requested_clients = []

3517

default = ['android', 'web']

3518

allowed_clients = sorted(

3519

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3520

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3521

for client in self._configuration_arg('player_client'):

3522

if client in allowed_clients:

3523

requested_clients.append(client)

3524

elif client == 'default':

3525

requested_clients.extend(default)

3526

elif client == 'all':

3527

requested_clients.extend(allowed_clients)

3528

else:

3529

self.report_warning(f'Skipping unsupported client {client}')

3530

if not requested_clients:

3531

requested_clients = default

3532

3533

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3534

requested_clients.extend(

3535

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3536

3537

return orderedSet(requested_clients)

3538

3539

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3540

initial_pr = None

3541

if webpage:

3542

initial_pr = self._search_json(

3543

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3544

3545

all_clients = set(clients)

3546

clients = clients[::-1]

3547

prs = []

3548

3549

def append_client(*client_names):

3550

""" Append the first client name that exists but not already used """

3551

for client_name in client_names:

3552

actual_client = _split_innertube_client(client_name)[0]

3553

if actual_client in INNERTUBE_CLIENTS:

3554

if actual_client not in all_clients:

3555

clients.append(client_name)

3556

all_clients.add(actual_client)

3557

return

3558

3559

# Android player_response does not have microFormats which are needed for

3560

# extraction of some data. So we return the initial_pr with formats

3561

# stripped out even if not requested by the user

3562

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3563

if initial_pr:

3564

pr = dict(initial_pr)

3565

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3570

player_url = None

3571

while clients:

3572

client, base_client, variant = _split_innertube_client(clients.pop())

3573

player_ytcfg = master_ytcfg if client == 'web' else {}

3574

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3575

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3576

3577

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3578

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3579

if 'js' in self._configuration_arg('player_skip'):

3580

require_js_player = False

3581

player_url = None

3582

3583

if not player_url and not tried_iframe_fallback and require_js_player:

3584

player_url = self._download_player_url(video_id)

3585

tried_iframe_fallback = True

3586

3587

try:

3588

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3589

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3590

except ExtractorError as e:

3591

if last_error:

3592

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3598

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3599

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3600

if pr_video_id and pr_video_id != video_id:

3601

self.report_warning(

3602

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

3603

else:

3604

# Save client name for introspection later

3605

name = short_client_name(client)

3606

sd = traverse_obj(pr, ('streamingData', {dict})) or {}

3607

sd[STREAMING_DATA_CLIENT_NAME] = name

3608

for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):

3609

f[STREAMING_DATA_CLIENT_NAME] = name

3610

prs.append(pr)

3611

3612

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3613

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3614

append_client(f'{base_client}_creator')

3615

elif self._is_agegated(pr):

3616

if variant == 'tv_embedded':

3617

append_client(f'{base_client}_embedded')

3618

elif not variant:

3619

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3625

return prs, player_url

3626

3627

def _needs_live_processing(self, live_status, duration):

3628

if (live_status == 'is_live' and self.get_param('live_from_start')

3629

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3630

return live_status

3631

3632

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3633

CHUNK_SIZE = 10 << 20

3634

itags, stream_ids = collections.defaultdict(set), []

3635

itag_qualities, res_qualities = {}, {0: None}

3636

q = qualities([

3637

# Normally tiny is the smallest video-only formats. But

3638

# audio-only formats with unknown quality may get tagged as tiny

3639

'tiny',

3640

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3641

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3642

])

3643

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))

3644

all_formats = self._configuration_arg('include_duplicate_formats')

3645

3646

def build_fragments(f):

3647

return LazyList({

3648

'url': update_url_query(f['url'], {

3649

'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'

3650

})

3651

} for range_start in range(0, f['filesize'], CHUNK_SIZE))

3652

3653

for fmt in streaming_formats:

3654

if fmt.get('targetDurationSec'):

3655

continue

3656

3657

itag = str_or_none(fmt.get('itag'))

3658

audio_track = fmt.get('audioTrack') or {}

3659

stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))

3660

if not all_formats:

3661

if stream_id in stream_ids:

3662

continue

3663

3664

quality = fmt.get('quality')

3665

height = int_or_none(fmt.get('height'))

3666

if quality == 'tiny' or not quality:

3667

quality = fmt.get('audioQuality', '').lower() or quality

3668

# The 3gp format (17) in android client has a quality of "small",

3669

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3675

if height:

3676

res_qualities[height] = quality

3677

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3678

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3679

# number of fragment that would subsequently requested with (`&sq=N`)

3680

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3681

continue

3682

3683

fmt_url = fmt.get('url')

3684

if not fmt_url:

3685

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3686

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3687

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3688

if not all((sc, fmt_url, player_url, encrypted_sig)):

3689

continue

3690

try:

3691

fmt_url += '&%s=%s' % (

3692

traverse_obj(sc, ('sp', -1)) or 'signature',

3693

self._decrypt_signature(encrypted_sig, video_id, player_url)

3694

)

3695

except ExtractorError as e:

3696

self.report_warning('Signature extraction failed: Some formats may be missing',

3697

video_id=video_id, only_once=True)

3698

self.write_debug(e, only_once=True)

3699

continue

3700

3701

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3706

fmt_url = update_url_query(fmt_url, {

3707

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3708

})

3709

except ExtractorError as e:

3710

phantomjs_hint = ''

3711

if isinstance(e, JSInterpreter.Exception):

3712

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3713

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3714

if player_url:

3715

self.report_warning(

3716

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3717

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3718

self.write_debug(e, only_once=True)

3719

else:

3720

self.report_warning(

3721

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3722

video_id=video_id, only_once=True)

3723

throttled = True

3724

3725

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3726

language_preference = (

3727

10 if audio_track.get('audioIsDefault') and 10

3728

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3729

else -1)

3730

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3731

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3732

# Make sure to avoid false positives with small duration differences.

3733

# E.g. __2ABJjxzNo, ySuUZEjARPY

3734

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3735

if is_damaged:

3736

self.report_warning(

3737

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3738

3739

client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)

3740

dct = {

3741

'asr': int_or_none(fmt.get('audioSampleRate')),

3742

'filesize': int_or_none(fmt.get('contentLength')),

3743

'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',

3744

'format_note': join_nonempty(

3745

join_nonempty(audio_track.get('displayName'),

3746

language_preference > 0 and ' (default)', delim=''),

3747

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3748

fmt.get('isDrc') and 'DRC',

3749

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3750

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3751

throttled and 'THROTTLED', is_damaged and 'DAMAGED',

3752

(self.get_param('verbose') or all_formats) and client_name,

3753

delim=', '),

3754

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3755

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3756

'fps': int_or_none(fmt.get('fps')) or None,

3757

'audio_channels': fmt.get('audioChannels'),

3758

'height': height,

3759

'quality': q(quality) - bool(fmt.get('isDrc')) / 2,

3760

'has_drm': bool(fmt.get('drmFamilies')),

3761

'tbr': tbr,

3762

'url': fmt_url,

3763

'width': int_or_none(fmt.get('width')),

3764

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3765

'desc' if language_preference < -1 else '') or None,

3766

'language_preference': language_preference,

3767

# Strictly de-prioritize damaged and 3gp formats

3768

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3769

}

3770

mime_mobj = re.match(

3771

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3772

if mime_mobj:

3773

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3774

dct.update(parse_codecs(mime_mobj.group(2)))

3775

if itag:

3776

itags[itag].add(('https', dct.get('language')))

3777

stream_ids.append(stream_id)

3778

single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))

3779

if single_stream and dct.get('ext'):

3780

dct['container'] = dct['ext'] + '_dash'

if dct['filesize']:

yield {

**dct,

'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],

3786

'protocol': 'http_dash_segments',

3787

'fragments': build_fragments(dct),

}

if not all_formats:

continue

dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}

3792

yield dct

3793

3794

needs_live_processing = self._needs_live_processing(live_status, duration)

3795

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3796

3797

skip_manifests = set(self._configuration_arg('skip'))

3798

if (not self.get_param('youtube_include_hls_manifest', True)

3799

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3800

or needs_live_processing and skip_bad_formats):

3801

skip_manifests.add('hls')

3802

3803

if not self.get_param('youtube_include_dash_manifest', True):

3804

skip_manifests.add('dash')

3805

if self._configuration_arg('include_live_dash'):

3806

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3807

'Use include_incomplete_formats extractor argument instead')

3808

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3809

skip_manifests.add('dash')

3810

3811

def process_manifest_format(f, proto, client_name, itag):

3812

key = (proto, f.get('language'))

3813

if not all_formats and key in itags[itag]:

return False

itags[itag].add(key)

if itag and all_formats:

3818

f['format_id'] = f'{itag}-{proto}'

3819

elif any(p != proto for p, _ in itags[itag]):

3820

f['format_id'] = f'{itag}-{proto}'

3821

elif itag:

3822

f['format_id'] = itag

3823

3824

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3825

if f['quality'] == -1 and f.get('height'):

3826

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

3827

if self.get_param('verbose'):

3828

f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')

return True

subtitles = {}

for sd in streaming_data:

3833

client_name = sd.get(STREAMING_DATA_CLIENT_NAME)

3834

3835

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3836

if hls_manifest_url:

3837

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3838

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3839

subtitles = self._merge_subtitles(subs, subtitles)

3840

for f in fmts:

3841

if process_manifest_format(f, 'hls', client_name, self._search_regex(

3842

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3843

yield f

3844

3845

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3846

if dash_manifest_url:

3847

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3848

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3849

for f in formats:

3850

if process_manifest_format(f, 'dash', client_name, f['format_id']):

3851

f['filesize'] = int_or_none(self._search_regex(

3852

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3853

if needs_live_processing:

3854

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3860

spec = get_first(

3861

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3862

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3867

args = args.split('#')

3868

counts = list(map(int_or_none, args[:5]))

3869

if len(args) != 8 or not all(counts):

3870

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3871

continue

3872

width, height, frame_count, cols, rows = counts

3873

N, sigh = args[6:]

3874

3875

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3876

fragment_count = frame_count / (cols * rows)

3877

fragment_duration = duration / fragment_count

3878

yield {

3879

'format_id': f'sb{i}',

3880

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3893

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3894

} for j in range(math.ceil(fragment_count))],

3895

}

3896

3897

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3898

webpage = None

3899

if 'webpage' not in self._configuration_arg('player_skip'):

3900

query = {'bpctr': '9999999999', 'has_verified': '1'}

3901

if smuggled_data.get('is_story'):

3902

query['pp'] = self._STORY_PLAYER_PARAMS

3903

webpage = self._download_webpage(

3904

webpage_url, video_id, fatal=False, query=query)

3905

3906

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3907

3908

player_responses, player_url = self._extract_player_responses(

3909

self._get_requested_clients(url, smuggled_data),

3910

video_id, webpage, master_ytcfg, smuggled_data)

3911

3912

return webpage, master_ytcfg, player_responses, player_url

3913

3914

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3915

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3916

is_live = get_first(video_details, 'isLive')

3917

if is_live is None:

3918

is_live = get_first(live_broadcast_details, 'isLiveNow')

3919

live_content = get_first(video_details, 'isLiveContent')

3920

is_upcoming = get_first(video_details, 'isUpcoming')

3921

post_live = get_first(video_details, 'isPostLiveDvr')

3922

live_status = ('post_live' if post_live

3923

else 'is_live' if is_live

3924

else 'is_upcoming' if is_upcoming

3925

else 'was_live' if live_content

3926

else 'not_live' if False in (is_live, live_content)

3927

else None)

3928

streaming_data = traverse_obj(player_responses, (..., 'streamingData'))

3929

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3930

3931

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3932

3933

def _real_extract(self, url):

3934

url, smuggled_data = unsmuggle_url(url, {})

3935

video_id = self._match_id(url)

3936

3937

base_url = self.http_scheme() + '//www.youtube.com/'

3938

webpage_url = base_url + 'watch?v=' + video_id

3939

3940

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3941

3942

playability_statuses = traverse_obj(

3943

player_responses, (..., 'playabilityStatus'), expected_type=dict)

3944

3945

trailer_video_id = get_first(

3946

playability_statuses,

3947

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3948

expected_type=str)

3949

if trailer_video_id:

3950

return self.url_result(

3951

trailer_video_id, self.ie_key(), trailer_video_id)

3952

3953

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3954

if webpage else (lambda x: None))

3955

3956

video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)

3957

microformats = traverse_obj(

3958

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3959

expected_type=dict)

3960

3961

translated_title = self._get_text(microformats, (..., 'title'))

3962

video_title = (self._preferred_lang and translated_title

3963

or get_first(video_details, 'title') # primary

3964

or translated_title

3965

or search_meta(['og:title', 'twitter:title', 'title']))

3966

translated_description = self._get_text(microformats, (..., 'description'))

3967

original_description = get_first(video_details, 'shortDescription')

3968

video_description = (

3969

self._preferred_lang and translated_description

3970

# If original description is blank, it will be an empty string.

3971

# Do not prefer translated description in this case.

3972

or original_description if original_description is not None else translated_description)

3973

3974

multifeed_metadata_list = get_first(

3975

player_responses,

3976

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3977

expected_type=str)

3978

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3979

if self.get_param('noplaylist'):

3980

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3985

# Unquote should take place before split on comma (,) since textual

3986

# fields may contain comma as well (see

3987

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3988

feed_data = urllib.parse.parse_qs(

3989

urllib.parse.unquote_plus(feed))

3990

3991

def feed_entry(name):

3992

return try_get(

3993

feed_data, lambda x: x[name][0], str)

3994

3995

feed_id = feed_entry('id')

3996

if not feed_id:

3997

continue

3998

feed_title = feed_entry('title')

3999

title = video_title

4000

if feed_title:

4001

title += ' (%s)' % feed_title

4002

entries.append({

4003

'_type': 'url_transparent',

4004

'ie_key': 'Youtube',

4005

'url': smuggle_url(

4006

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

4007

{'force_singlefeed': True}),

4008

'title': title,

4009

})

4010

feed_ids.append(feed_id)

4011

self.to_screen(

4012

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

4013

% (', '.join(feed_ids), video_id))

4014

return self.playlist_result(

4015

entries, video_id, video_title, video_description)

4016

4017

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

4018

or int_or_none(get_first(microformats, 'lengthSeconds'))

4019

or parse_duration(search_meta('duration')) or None)

4020

4021

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

4022

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

4023

if live_status == 'post_live':

4024

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

4025

4026

if not formats:

4027

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

4028

self.report_drm(video_id)

4029

pemr = get_first(

4030

playability_statuses,

4031

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

4032

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

4033

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

4034

if subreason:

4035

if subreason == 'The uploader has not made this video available in your country.':

4036

countries = get_first(microformats, 'availableCountries')

4037

if not countries:

4038

regions_allowed = search_meta('regionsAllowed')

4039

countries = regions_allowed.split(',') if regions_allowed else None

4040

self.raise_geo_restricted(subreason, countries, metadata_available=True)

4041

reason += f'. {subreason}'

4042

if reason:

4043

self.raise_no_formats(reason, expected=True)

4044

4045

keywords = get_first(video_details, 'keywords', expected_type=list) or []

4046

if not keywords and webpage:

4047

keywords = [

4048

unescapeHTML(m.group('content'))

4049

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

4050

for keyword in keywords:

4051

if keyword.startswith('yt:stretch='):

4052

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

4053

if mobj:

4054

# NB: float is intentional for forcing float division

4055

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

4060

f['stretched_ratio'] = ratio

4061

break

4062

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

4063

thumbnail_url = search_meta(['og:image', 'twitter:image'])

4064

if thumbnail_url:

4065

thumbnails.append({

4066

'url': thumbnail_url,

4067

})

4068

original_thumbnails = thumbnails.copy()

4069

4070

# The best resolution thumbnails sometimes does not appear in the webpage

4071

# See: https://github.com/yt-dlp/yt-dlp/issues/340

4072

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

4073

thumbnail_names = [

4074

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

4075

# in resolution, these are not the custom thumbnail. So de-prioritize them

4076

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

4077

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

4078

]

4079

n_thumbnail_names = len(thumbnail_names)

4080

thumbnails.extend({

4081

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

4082

video_id=video_id, name=name, ext=ext,

4083

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

4084

} for name in thumbnail_names for ext in ('webp', 'jpg'))

4085

for thumb in thumbnails:

4086

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

4087

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

4088

self._remove_duplicate_formats(thumbnails)

4089

self._downloader._sort_thumbnails(original_thumbnails)

4090

4091

category = get_first(microformats, 'category') or search_meta('genre')

4092

channel_id = str_or_none(

4093

get_first(video_details, 'channelId')

4094

or get_first(microformats, 'externalChannelId')

4095

or search_meta('channelId'))

4096

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

4097

4098

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

4099

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

4100

if not duration and live_end_time and live_start_time:

4101

duration = live_end_time - live_start_time

4102

4103

needs_live_processing = self._needs_live_processing(live_status, duration)

4104

4105

def is_bad_format(fmt):

4106

if needs_live_processing and not fmt.get('is_from_start'):

4107

return True

4108

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

4109

and fmt.get('protocol') == 'http_dash_segments'):

4110

return True

4111

4112

for fmt in filter(is_bad_format, formats):

4113

fmt['preference'] = (fmt.get('preference') or -1) - 10

4114

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

4115

4116

if needs_live_processing:

4117

self._prepare_live_from_start_formats(

4118

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

4119

4120

formats.extend(self._extract_storyboard(player_responses, duration))

info = {

'id': video_id,

'title': video_title,

4125

'formats': formats,

4126

'thumbnails': thumbnails,

4127

# The best thumbnail that we are sure exists. Prevents unnecessary

4128

# URL checking if user don't care about getting the best possible thumbnail

4129

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

4130

'description': video_description,

4131

'uploader': get_first(video_details, 'author'),

4132

'uploader_id': self._search_regex(r'/(?:channel/|user/|(?=@))([^/?&#]+)', owner_profile_url, 'uploader id', default=None),

4133

'uploader_url': owner_profile_url,

4134

'channel_id': channel_id,

4135

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

4136

'duration': duration,

4137

'view_count': int_or_none(

4138

get_first((video_details, microformats), (..., 'viewCount'))

4139

or search_meta('interactionCount')),

4140

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

4141

'age_limit': 18 if (

4142

get_first(microformats, 'isFamilySafe') is False

4143

or search_meta('isFamilyFriendly') == 'false'

4144

or search_meta('og:restrictions:age') == '18+') else 0,

4145

'webpage_url': webpage_url,

4146

'categories': [category] if category else None,

4147

'tags': keywords,

4148

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

4149

'live_status': live_status,

4150

'release_timestamp': live_start_time,

4151

'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats

4152

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

4157

if pctr:

4158

def get_lang_code(track):

4159

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

4160

or track.get('languageCode'))

4161

4162

# Converted into dicts to remove duplicates

4163

captions = {

4164

get_lang_code(sub): sub

4165

for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}

4166

translation_languages = {

4167

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

4168

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}

4169

4170

def process_language(container, base_url, lang_code, sub_name, query):

4171

lang_subs = container.setdefault(lang_code, [])

4172

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4183

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4184

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4185

for lang_code, caption_track in captions.items():

4186

base_url = caption_track.get('baseUrl')

4187

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4188

if not base_url:

4189

continue

4190

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4191

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4196

if not caption_track.get('isTranslatable'):

4197

continue

4198

for trans_code, trans_name in translation_languages.items():

4199

if not trans_code:

4200

continue

4201

orig_trans_code = trans_code

4202

if caption_track.get('kind') != 'asr' and trans_code != 'und':

4203

if not get_translated_subs:

4204

continue

4205

trans_code += f'-{lang_code}'

4206

trans_name += format_field(lang_name, None, ' from %s')

4207

# Add an "-orig" label to the original language so that it can be distinguished.

4208

# The subs are returned without "-orig" as well for compatibility

4209

if lang_code == f'a-{orig_trans_code}':

4210

process_language(

4211

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4212

# Setting tlang=lang returns damaged subtitles.

4213

process_language(automatic_captions, base_url, trans_code, trans_name,

4214

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4215

4216

info['automatic_captions'] = automatic_captions

4217

info['subtitles'] = subtitles

4218

4219

parsed_url = urllib.parse.urlparse(url)

4220

for component in [parsed_url.fragment, parsed_url.query]:

4221

query = urllib.parse.parse_qs(component)

4222

for k, v in query.items():

4223

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4224

d_k += '_time'

4225

if d_k not in info and k in s_ks:

4226

info[d_k] = parse_duration(query[k][0])

4227

4228

# Youtube Music Auto-generated description

4229

if video_description:

4230

mobj = re.search(

4231

r'''(?xs)

4232

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4233

(?P<album>[^\n]+)

4234

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4235

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4236

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4237

.+\nAuto-generated\ by\ YouTube\.\s*$

4238

''', video_description)

4239

if mobj:

4240

release_year = mobj.group('release_year')

4241

release_date = mobj.group('release_date')

4242

if release_date:

4243

release_date = release_date.replace('-', '')

4244

if not release_year:

4245

release_year = release_date[:4]

4246

info.update({

4247

'album': mobj.group('album'.strip()),

4248

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4249

'track': mobj.group('track').strip(),

4250

'release_date': release_date,

4251

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4257

if not traverse_obj(initial_data, 'contents'):

4258

self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')

4259

initial_data = None

4260

if not initial_data:

4261

query = {'videoId': video_id}

4262

query.update(self._get_checkok_params())

4263

initial_data = self._extract_response(

4264

item_id=video_id, ep='next', fatal=False,

4265

ytcfg=master_ytcfg, query=query, check_get_keys='contents',

4266

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4267

note='Downloading initial data API JSON')

4268

4269

info['comment_count'] = traverse_obj(initial_data, (

4270

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4271

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'

4272

), (

4273

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4274

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'

4275

), expected_type=self._get_count, get_all=False)

4276

4277

try: # This will error if there is no livechat

4278

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4279

except (KeyError, IndexError, TypeError):

4280

pass

4281

else:

4282

info.setdefault('subtitles', {})['live_chat'] = [{

4283

# url is needed to set cookies

4284

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4285

'video_id': video_id,

4286

'ext': 'json',

4287

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4288

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4294

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4295

or self._extract_chapters_from_description(video_description, duration)

4296

or None)

4297

4298

contents = traverse_obj(

4299

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4300

expected_type=list, default=[])

4301

4302

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4303

if vpir:

4304

stl = vpir.get('superTitleLink')

4305

if stl:

4306

stl = self._get_text(stl)

4307

if try_get(

4308

vpir,

4309

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4310

info['location'] = stl

4311

else:

4312

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4313

if mobj:

4314

info.update({

4315

'series': mobj.group(1),

4316

'season_number': int(mobj.group(2)),

4317

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, ('toggleButtonRenderer', ...),

4326

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))

4327

for tbr in tbrs:

4328

for getter, regex in [(

4329

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4330

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4331

lambda x: x['accessibility'],

4332

lambda x: x['accessibilityData']['accessibilityData'],

4333

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4334

label = (try_get(tbr, getter, dict) or {}).get('label')

4335

if label:

4336

mobj = re.match(regex, label)

4337

if mobj:

4338

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4339

break

4340

sbr_tooltip = try_get(

4341

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4342

if sbr_tooltip:

4343

like_count, dislike_count = sbr_tooltip.split(' / ')

4344

info.update({

4345

'like_count': str_to_int(like_count),

4346

'dislike_count': str_to_int(dislike_count),

4347

})

4348

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4349

if vcr:

4350

vc = self._get_count(vcr, 'viewCount')

4351

# Upcoming premieres with waiting count are treated as live here

4352

if vcr.get('isLive'):

4353

info['concurrent_view_count'] = vc

4354

elif info.get('view_count') is None:

4355

info['view_count'] = vc

4356

4357

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4358

if vsir:

4359

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4360

info.update({

4361

'channel': self._get_text(vor, 'title'),

4362

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4367

list) or []

4368

multiple_songs = False

4369

for row in rows:

4370

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4371

multiple_songs = True

4372

break

4373

for row in rows:

4374

mrr = row.get('metadataRowRenderer') or {}

4375

mrr_title = mrr.get('title')

4376

if not mrr_title:

4377

continue

4378

mrr_title = self._get_text(mrr, 'title')

4379

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4380

if mrr_title == 'License':

4381

info['license'] = mrr_contents_text

4382

elif not multiple_songs:

4383

if mrr_title == 'Album':

4384

info['album'] = mrr_contents_text

4385

elif mrr_title == 'Artist':

4386

info['artist'] = mrr_contents_text

4387

elif mrr_title == 'Song':

4388

info['track'] = mrr_contents_text

4389

4390

fallbacks = {

4391

'channel': 'uploader',

4392

'channel_id': 'uploader_id',

4393

'channel_url': 'uploader_url',

4394

}

4395

4396

# The upload date for scheduled, live and past live streams / premieres in microformats

4397

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4398

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4399

upload_date = (

4400

unified_strdate(get_first(microformats, 'uploadDate'))

4401

or unified_strdate(search_meta('uploadDate')))

4402

if not upload_date or (

4403

live_status in ('not_live', None)

4404

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4405

):

4406

upload_date = strftime_or_none(

4407

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4408

info['upload_date'] = upload_date

4409

4410

for to, frm in fallbacks.items():

4411

if not info.get(to):

4412

info[to] = info.get(frm)

4413

4414

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4420

4421

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4422

or get_first(video_details, 'isPrivate', expected_type=bool))

4423

4424

info['availability'] = (

4425

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4426

else self._availability(

4427

is_private=is_private,

4428

needs_premium=(

4429

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4430

or False if initial_data and is_private is not None else None),

4431

needs_subscription=(

4432

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4433

or False if initial_data and is_private is not None else None),

4434

needs_auth=info['age_limit'] >= 18,

4435

is_unlisted=None if is_private is None else (

4436

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4437

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4438

4439

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4440

4441

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4447

@staticmethod

4448

def passthrough_smuggled_data(func):

4449

def _smuggle(info, smuggled_data):

4450

if info.get('_type') not in ('url', 'url_transparent'):

4451

return info

4452

if smuggled_data.get('is_music_url'):

4453

parsed_url = urllib.parse.urlparse(info['url'])

4454

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4455

smuggled_data.pop('is_music_url')

4456

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4457

if smuggled_data:

4458

info['url'] = smuggle_url(info['url'], smuggled_data)

4459

return info

4460

4461

@functools.wraps(func)

4462

def wrapper(self, url):

4463

url, smuggled_data = unsmuggle_url(url, {})

4464

if self.is_music_url(url):

4465

smuggled_data['is_music_url'] = True

4466

info_dict = func(self, url, smuggled_data)

4467

if smuggled_data:

4468

_smuggle(info_dict, smuggled_data)

4469

if info_dict.get('entries'):

4470

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

@staticmethod

def _extract_basic_item_renderer(item):

4476

# Modified from _extract_grid_item_renderer

4477

known_basic_renderers = (

4478

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4479

)

4480

for key, renderer in item.items():

4481

if not isinstance(renderer, dict):

4482

continue

4483

elif key in known_basic_renderers:

4484

return renderer

4485

elif key.startswith('grid') and key.endswith('Renderer'):

4486

return renderer

4487

4488

def _extract_channel_renderer(self, renderer):

4489

channel_id = renderer['channelId']

4490

title = self._get_text(renderer, 'title')

4491

channel_url = f'https://www.youtube.com/channel/{channel_id}'

return {

'_type': 'url',

'url': channel_url,

'id': channel_id,

'ie_key': YoutubeTabIE.ie_key(),

4497

'channel': title,

4498

'channel_id': channel_id,

4499

'channel_url': channel_url,

4500

'title': title,

4501

'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),

4502

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

4503

'playlist_count': self._get_count(renderer, 'videoCountText'),

4504

'description': self._get_text(renderer, 'descriptionSnippet'),

4505

}

4506

4507

def _grid_entries(self, grid_renderer):

4508

for item in grid_renderer['items']:

4509

if not isinstance(item, dict):

4510

continue

4511

renderer = self._extract_basic_item_renderer(item)

4512

if not isinstance(renderer, dict):

4513

continue

4514

title = self._get_text(renderer, 'title')

4515

4516

# playlist

4517

playlist_id = renderer.get('playlistId')

4518

if playlist_id:

4519

yield self.url_result(

4520

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4521

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4526

if video_id:

4527

yield self._extract_video(renderer)

4528

continue

4529

# channel

4530

channel_id = renderer.get('channelId')

4531

if channel_id:

4532

yield self._extract_channel_renderer(renderer)

4533

continue

4534

# generic endpoint URL support

4535

ep_url = urljoin('https://www.youtube.com/', try_get(

4536

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4537

str))

4538

if ep_url:

4539

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4540

if ie.suitable(ep_url):

4541

yield self.url_result(

4542

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4543

break

4544

4545

def _music_reponsive_list_entry(self, renderer):

4546

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4547

if video_id:

4548

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4549

ie=YoutubeIE.ie_key(), video_id=video_id)

4550

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4551

if playlist_id:

4552

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4553

if video_id:

4554

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4555

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4556

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4557

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4558

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4559

if browse_id:

4560

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4561

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4562

4563

def _shelf_entries_from_content(self, shelf_renderer):

4564

content = shelf_renderer.get('content')

4565

if not isinstance(content, dict):

4566

return

4567

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4568

if renderer:

4569

# TODO: add support for nested playlists so each shelf is processed

4570

# as separate playlist

4571

# TODO: this includes only first N items

4572

yield from self._grid_entries(renderer)

4573

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4579

ep = try_get(

4580

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4581

str)

4582

shelf_url = urljoin('https://www.youtube.com', ep)

4583

if shelf_url:

4584

# Skipping links to another channels, note that checking for

4585

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4586

# will not work

4587

if skip_channels and '/channels?' in shelf_url:

4588

return

4589

title = self._get_text(shelf_renderer, 'title')

4590

yield self.url_result(shelf_url, video_title=title)

4591

# Shelf may not contain shelf URL, fallback to extraction from content

4592

yield from self._shelf_entries_from_content(shelf_renderer)

4593

4594

def _playlist_entries(self, video_list_renderer):

4595

for content in video_list_renderer['contents']:

4596

if not isinstance(content, dict):

4597

continue

4598

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4599

if not isinstance(renderer, dict):

4600

continue

4601

video_id = renderer.get('videoId')

4602

if not video_id:

4603

continue

4604

yield self._extract_video(renderer)

4605

4606

def _rich_entries(self, rich_grid_renderer):

4607

renderer = traverse_obj(

4608

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4609

video_id = renderer.get('videoId')

4610

if not video_id:

4611

return

4612

yield self._extract_video(renderer)

4613

4614

def _video_entry(self, video_renderer):

4615

video_id = video_renderer.get('videoId')

4616

if video_id:

4617

return self._extract_video(video_renderer)

4618

4619

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4620

url = urljoin('https://youtube.com', traverse_obj(

4621

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4622

if url:

4623

return self.url_result(

4624

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4625

4626

def _post_thread_entries(self, post_thread_renderer):

4627

post_renderer = try_get(

4628

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4629

if not post_renderer:

4630

return

4631

# video attachment

4632

video_renderer = try_get(

4633

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4634

video_id = video_renderer.get('videoId')

4635

if video_id:

4636

entry = self._extract_video(video_renderer)

4637

if entry:

4638

yield entry

4639

# playlist attachment

4640

playlist_id = try_get(

4641

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4642

if playlist_id:

4643

yield self.url_result(

4644

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4645

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4646

# inline video links

4647

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4648

for run in runs:

4649

if not isinstance(run, dict):

4650

continue

4651

ep_url = try_get(

4652

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4653

if not ep_url:

4654

continue

4655

if not YoutubeIE.suitable(ep_url):

4656

continue

4657

ep_video_id = YoutubeIE._match_id(ep_url)

4658

if video_id == ep_video_id:

4659

continue

4660

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4661

4662

def _post_thread_continuation_entries(self, post_thread_continuation):

4663

contents = post_thread_continuation.get('contents')

4664

if not isinstance(contents, list):

4665

return

4666

for content in contents:

4667

renderer = content.get('backstagePostThreadRenderer')

4668

if isinstance(renderer, dict):

4669

yield from self._post_thread_entries(renderer)

4670

continue

4671

renderer = content.get('videoRenderer')

4672

if isinstance(renderer, dict):

4673

yield self._video_entry(renderer)

4674

4675

r''' # unused

4676

def _rich_grid_entries(self, contents):

4677

for content in contents:

4678

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4679

if video_renderer:

4680

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4686

for url in traverse_obj(renderer, (

4687

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4688

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4689

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4690

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4691

4692

def _extract_entries(self, parent_renderer, continuation_list):

4693

# continuation_list is modified in-place with continuation_list = [continuation_token]

4694

continuation_list[:] = [None]

4695

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4696

for content in contents:

4697

if not isinstance(content, dict):

4698

continue

4699

is_renderer = traverse_obj(

4700

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4701

expected_type=dict)

4702

if not is_renderer:

4703

if content.get('richItemRenderer'):

4704

for entry in self._rich_entries(content['richItemRenderer']):

4705

yield entry

4706

continuation_list[0] = self._extract_continuation(parent_renderer)

4707

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4708

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4709

yield from self._report_history_entries(table)

4710

continuation_list[0] = self._extract_continuation(table)

4711

continue

4712

4713

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4714

for isr_content in isr_contents:

4715

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4720

'gridRenderer': self._grid_entries,

4721

'reelShelfRenderer': self._grid_entries,

4722

'shelfRenderer': self._shelf_entries,

4723

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4724

'backstagePostThreadRenderer': self._post_thread_entries,

4725

'videoRenderer': lambda x: [self._video_entry(x)],

4726

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4727

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4728

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4729

}

4730

for key, renderer in isr_content.items():

4731

if key not in known_renderers:

4732

continue

4733

for entry in known_renderers[key](renderer):

4734

if entry:

4735

yield entry

4736

continuation_list[0] = self._extract_continuation(renderer)

4737

break

4738

4739

if not continuation_list[0]:

4740

continuation_list[0] = self._extract_continuation(is_renderer)

4741

4742

if not continuation_list[0]:

4743

continuation_list[0] = self._extract_continuation(parent_renderer)

4744

4745

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4746

continuation_list = [None]

4747

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4748

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4753

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4754

yield from extract_entries(parent_renderer)

4755

continuation = continuation_list[0]

4756

4757

for page_num in itertools.count(1):

4758

if not continuation:

4759

break

4760

headers = self.generate_api_headers(

4761

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4762

response = self._extract_response(

4763

item_id=f'{item_id} page {page_num}',

4764

query=continuation, headers=headers, ytcfg=ytcfg,

4765

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4770

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4771

visitor_data = self._extract_visitor_data(response) or visitor_data

4772

4773

known_renderers = {

4774

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4775

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4776

'gridVideoRenderer': (self._grid_entries, 'items'),

4777

'gridChannelRenderer': (self._grid_entries, 'items'),

4778

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4779

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4780

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4781

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4782

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4783

'playlistVideoListContinuation': (self._playlist_entries, None),

4784

'gridContinuation': (self._grid_entries, None),

4785

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4786

'sectionListContinuation': (extract_entries, None), # for feeds

4787

}

4788

4789

continuation_items = traverse_obj(response, (

4790

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4791

'appendContinuationItemsAction', 'continuationItems'

4792

), 'continuationContents', get_all=False)

4793

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4794

4795

video_items_renderer = None

4796

for key in continuation_item.keys():

4797

if key not in known_renderers:

4798

continue

4799

func, parent_key = known_renderers[key]

4800

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4801

continuation_list = [None]

4802

yield from func(video_items_renderer)

4803

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4804

4805

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4810

for tab_renderer in tabs:

4811

if tab_renderer.get('selected'):

4812

return tab_renderer

4813

if fatal:

4814

raise ExtractorError('Unable to find selected tab')

4815

4816

@staticmethod

4817

def _extract_tab_renderers(response):

4818

return traverse_obj(

4819

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

4820

4821

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4822

metadata = self._extract_metadata_from_tabs(item_id, data)

4823

4824

selected_tab = self._extract_selected_tab(tabs)

4825

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

4826

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

4827

4828

return self.playlist_result(

4829

self._entries(

4830

selected_tab, metadata['id'], ytcfg,

4831

self._extract_account_syncid(ytcfg, data),

4832

self._extract_visitor_data(data, ytcfg)),

4833

**metadata)

4834

4835

def _extract_metadata_from_tabs(self, item_id, data):

4836

info = {'id': item_id}

4837

4838

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

4839

if metadata_renderer:

4840

info.update({

4841

'uploader': metadata_renderer.get('title'),

4842

'uploader_id': metadata_renderer.get('externalId'),

4843

'uploader_url': metadata_renderer.get('channelUrl'),

4844

})

4845

if info['uploader_id']:

4846

info['id'] = info['uploader_id']

4847

else:

4848

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

4849

4850

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4851

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4852

def _get_uncropped(url):

4853

return url_or_none((url or '').split('=')[0] + '=s0')

4854

4855

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

4856

if avatar_thumbnails:

4857

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4858

if uncropped_avatar:

4859

avatar_thumbnails.append({

4860

'url': uncropped_avatar,

4861

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4866

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

4867

for banner in channel_banners:

4868

banner['preference'] = -10

4869

4870

if channel_banners:

4871

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4872

if uncropped_banner:

4873

channel_banners.append({

4874

'url': uncropped_banner,

4875

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

4880

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4881

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

4882

4883

primary_thumbnails = self._extract_thumbnails(

4884

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4885

playlist_thumbnails = self._extract_thumbnails(

4886

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

4887

4888

info.update({

4889

'title': (traverse_obj(metadata_renderer, 'title')

4890

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

4891

or info['id']),

4892

'availability': self._extract_availability(data),

4893

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4894

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

4895

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

4896

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

4897

})

4898

4899

# Playlist stats is a text runs array containing [video count, view count, last updated].

4900

# last updated or (view count and last updated) may be missing.

4901

playlist_stats = get_first(

4902

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

4903

4904

last_updated_unix = self._parse_time_text(

4905

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

4906

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

4907

info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')

4908

4909

info['view_count'] = self._get_count(playlist_stats, 1)

4910

if info['view_count'] is None: # 0 is allowed

4911

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

4912

if info['view_count'] is None:

4913

info['view_count'] = self._get_count(data, (

4914

'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',

4915

'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))

4916

4917

info['playlist_count'] = self._get_count(playlist_stats, 0)

4918

if info['playlist_count'] is None: # 0 is allowed

4919

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

4920

4921

if not info.get('uploader_id'):

4922

owner = traverse_obj(playlist_header_renderer, 'ownerText')

4923

if not owner: # Deprecated

4924

owner = traverse_obj(

4925

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

4926

('videoOwner', 'videoOwnerRenderer', 'title'))

4927

owner_text = self._get_text(owner)

4928

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

4929

info.update({

4930

'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

4931

'uploader_id': browse_ep.get('browseId'),

4932

'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))

})

info.update({

'channel': info['uploader'],

4937

'channel_id': info['uploader_id'],

4938

'channel_url': info['uploader_url']

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4943

first_id = last_id = response = None

4944

for page_num in itertools.count(1):

4945

videos = list(self._playlist_entries(playlist))

4946

if not videos:

4947

return

4948

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4949

if start >= len(videos):

4950

return

4951

yield from videos[start:]

4952

first_id = first_id or videos[0]['id']

4953

last_id = videos[-1]['id']

4954

watch_endpoint = try_get(

4955

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4956

headers = self.generate_api_headers(

4957

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4958

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4959

query = {

4960

'playlistId': playlist_id,

4961

'videoId': watch_endpoint.get('videoId') or last_id,

4962

'index': watch_endpoint.get('index') or len(videos),

4963

'params': watch_endpoint.get('params') or 'OAE%3D'

4964

}

4965

response = self._extract_response(

4966

item_id='%s page %d' % (playlist_id, page_num),

4967

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4968

check_get_keys='contents'

4969

)

4970

playlist = try_get(

4971

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4972

4973

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4974

title = playlist.get('title') or try_get(

4975

data, lambda x: x['titleText']['simpleText'], str)

4976

playlist_id = playlist.get('playlistId') or item_id

4977

4978

# Delegating everything except mix playlists to regular tab-based playlist URL

4979

playlist_url = urljoin(url, try_get(

4980

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4981

str))

4982

4983

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4984

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4985

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4986

4987

if playlist_url and playlist_url != url and not is_known_unviewable:

4988

return self.url_result(

4989

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4990

video_title=title)

4991

4992

return self.playlist_result(

4993

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4994

playlist_id=playlist_id, playlist_title=title)

4995

4996

def _extract_availability(self, data):

4997

"""

4998

Gets the availability of a given playlist/tab.

4999

Note: Unless YouTube tells us explicitly, we do not assume it is public

5000

@param data: response

5001

"""

5002

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

5003

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

5004

player_header_privacy = playlist_header_renderer.get('privacy')

5005

5006

badges = self._extract_badges(sidebar_renderer)

5007

5008

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

5009

privacy_setting_icon = get_first(

5010

(playlist_header_renderer, sidebar_renderer),

5011

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

5012

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

5013

expected_type=str)

5014

5015

microformats_is_unlisted = traverse_obj(

5016

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

5021

or player_header_privacy == 'PUBLIC'

5022

or privacy_setting_icon == 'PRIVACY_PUBLIC')

5023

else self._availability(

5024

is_private=(

5025

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

5026

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

5027

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

5028

is_unlisted=(

5029

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

5030

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

5031

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

5032

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

5033

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

5034

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

5039

sidebar_renderer = try_get(

5040

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

5041

for item in sidebar_renderer:

5042

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

5047

"""

5048

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

5049

"""

5050

is_playlist = bool(traverse_obj(

5051

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

5052

if not is_playlist:

5053

return

5054

headers = self.generate_api_headers(

5055

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5056

visitor_data=self._extract_visitor_data(data, ytcfg))

5057

query = {

5058

'params': 'wgYCCAA=',

5059

'browseId': f'VL{item_id}'

5060

}

5061

return self._extract_response(

5062

item_id=item_id, headers=headers, query=query,

5063

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

5064

note='Redownloading playlist API JSON with unavailable videos')

5065

5066

@functools.cached_property

5067

def skip_webpage(self):

5068

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

5069

5070

def _extract_webpage(self, url, item_id, fatal=True):

5071

webpage, data = None, None

5072

for retry in self.RetryManager(fatal=fatal):

5073

try:

5074

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

5075

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

5076

except ExtractorError as e:

5077

if isinstance(e.cause, network_exceptions):

5078

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

5079

retry.error = e

5080

continue

5081

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

5086

except ExtractorError as e:

5087

self._error_or_warning(e, fatal=fatal)

5088

break

5089

5090

# Sometimes youtube returns a webpage with incomplete ytInitialData

5091

# See: https://github.com/yt-dlp/yt-dlp/issues/116

5092

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

5093

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

5099

"""Use if failed to extract ytcfg (and data) from initial webpage"""

5100

if not ytcfg and self.is_authenticated:

5101

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

5102

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

5103

raise ExtractorError(

5104

f'{msg}. If you are not downloading private content, or '

5105

'your cookies are only for the first account and channel,'

5106

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

5107

expected=True)

5108

self.report_warning(msg, only_once=True)

5109

5110

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

5111

data = None

5112

if not self.skip_webpage:

5113

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

5114

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

5115

# Reject webpage data if redirected to home page without explicitly requesting

5116

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

5117

if (url != 'https://www.youtube.com/feed/recommended'

5118

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

5119

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

5120

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

5121

if fatal:

5122

raise ExtractorError(msg, expected=True)

5123

self.report_warning(msg, only_once=True)

5124

if not data:

5125

self._report_playlist_authcheck(ytcfg, fatal=fatal)

5126

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

5127

return data, ytcfg

5128

5129

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

5130

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

5131

resolve_response = self._extract_response(

5132

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

5133

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

5134

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

5135

for ep_key, ep in endpoints.items():

5136

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

5137

if params:

5138

return self._extract_response(

5139

item_id=item_id, query=params, ep=ep, headers=headers,

5140

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

5141

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

5142

err_note = 'Failed to resolve url (does the playlist exist?)'

5143

if fatal:

5144

raise ExtractorError(err_note, expected=True)

5145

self.report_warning(err_note, item_id)

5146

5147

_SEARCH_PARAMS = None

5148

5149

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

5150

data = {'query': query}

5151

if params is NO_DEFAULT:

5152

params = self._SEARCH_PARAMS

5153

if params:

5154

data['params'] = params

5155

5156

content_keys = (

5157

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

5158

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

5159

# ytmusic search

5160

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

5161

('continuationContents', ),

5162

)

5163

display_id = f'query "{query}"'

5164

check_get_keys = tuple({keys[0] for keys in content_keys})

5165

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

5166

self._report_playlist_authcheck(ytcfg, fatal=False)

5167

5168

continuation_list = [None]

5169

search = None

5170

for page_num in itertools.count(1):

5171

data.update(continuation_list[0] or {})

5172

headers = self.generate_api_headers(

5173

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

5174

search = self._extract_response(

5175

item_id=f'{display_id} page {page_num}', ep='search', query=data,

5176

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

5177

slr_contents = traverse_obj(search, *content_keys)

5178

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

5179

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

5184

IE_DESC = 'YouTube Tabs'

5185

_VALID_URL = r'''(?x:

5186

https?://

5187

(?!consent\.)(?:\w+\.)?

5188

(?:

5189

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5194

(?P<not_channel>

5195

feed/|hashtag/|

5196

(?:playlist|watch)\?.*?\blist=

5197

)|

5198

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5203

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5204

}

5205

IE_NAME = 'youtube:tab'

5206

5207

_TESTS = [{

5208

'note': 'playlists, multipage',

5209

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5210

'playlist_mincount': 94,

5211

'info_dict': {

5212

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5213

'title': 'Igor Kleiner - Playlists',

5214

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5215

'uploader': 'Igor Kleiner',

5216

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5217

'channel': 'Igor Kleiner',

5218

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5219

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5220

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5221

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5222

'channel_follower_count': int

5223

},

5224

}, {

5225

'note': 'playlists, multipage, different order',

5226

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5227

'playlist_mincount': 94,

5228

'info_dict': {

5229

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5230

'title': 'Igor Kleiner - Playlists',

5231

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5232

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5233

'uploader': 'Igor Kleiner',

5234

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5235

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5236

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5237

'channel': 'Igor Kleiner',

5238

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5239

'channel_follower_count': int

5240

},

5241

}, {

5242

'note': 'playlists, series',

5243

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5244

'playlist_mincount': 5,

5245

'info_dict': {

5246

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5247

'title': '3Blue1Brown - Playlists',

5248

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5249

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5250

'uploader': '3Blue1Brown',

5251

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5252

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5253

'channel': '3Blue1Brown',

5254

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5255

'tags': ['Mathematics'],

5256

'channel_follower_count': int

5257

},

5258

}, {

5259

'note': 'playlists, singlepage',

5260

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5261

'playlist_mincount': 4,

5262

'info_dict': {

5263

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5264

'title': 'ThirstForScience - Playlists',

5265

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5266

'uploader': 'ThirstForScience',

5267

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5268

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5269

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5270

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5271

'tags': 'count:13',

5272

'channel': 'ThirstForScience',

5273

'channel_follower_count': int

5274

}

5275

}, {

5276

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5277

'only_matching': True,

5278

}, {

5279

'note': 'basic, single video playlist',

5280

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5281

'info_dict': {

5282

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5283

'uploader': 'Sergey M.',

5284

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5285

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5290

'channel': 'Sergey M.',

5291

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5292

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5293

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5294

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5299

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5300

'info_dict': {

5301

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5302

'uploader': 'Sergey M.',

5303

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5304

'title': 'youtube-dl empty playlist',

5305

'tags': [],

5306

'channel': 'Sergey M.',

5307

'description': '',

5308

'modified_date': '20160902',

5309

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5310

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5311

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5312

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5318

'info_dict': {

5319

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5320

'title': 'lex will - Home',

5321

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5322

'uploader': 'lex will',

5323

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5324

'channel': 'lex will',

5325

'tags': ['bible', 'history', 'prophesy'],

5326

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5327

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5328

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5329

'channel_follower_count': int

5330

},

5331

'playlist_mincount': 2,

5332

}, {

5333

'note': 'Videos tab',

5334

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5335

'info_dict': {

5336

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5337

'title': 'lex will - Videos',

5338

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5339

'uploader': 'lex will',

5340

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5341

'tags': ['bible', 'history', 'prophesy'],

5342

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5343

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5344

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5345

'channel': 'lex will',

5346

'channel_follower_count': int

5347

},

5348

'playlist_mincount': 975,

5349

}, {

5350

'note': 'Videos tab, sorted by popular',

5351

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5352

'info_dict': {

5353

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5354

'title': 'lex will - Videos',

5355

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5356

'uploader': 'lex will',

5357

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5358

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5359

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5360

'channel': 'lex will',

5361

'tags': ['bible', 'history', 'prophesy'],

5362

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5363

'channel_follower_count': int

5364

},

5365

'playlist_mincount': 199,

5366

}, {

5367

'note': 'Playlists tab',

5368

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5369

'info_dict': {

5370

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5371

'title': 'lex will - Playlists',

5372

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5373

'uploader': 'lex will',

5374

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5375

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5376

'channel': 'lex will',

5377

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5378

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5379

'tags': ['bible', 'history', 'prophesy'],

5380

'channel_follower_count': int

5381

},

5382

'playlist_mincount': 17,

5383

}, {

5384

'note': 'Community tab',

5385

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5386

'info_dict': {

5387

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5388

'title': 'lex will - Community',

5389

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5390

'uploader': 'lex will',

5391

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5392

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5393

'channel': 'lex will',

5394

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5395

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5396

'tags': ['bible', 'history', 'prophesy'],

5397

'channel_follower_count': int

5398

},

5399

'playlist_mincount': 18,

5400

}, {

5401

'note': 'Channels tab',

5402

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5403

'info_dict': {

5404

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5405

'title': 'lex will - Channels',

5406

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5407

'uploader': 'lex will',

5408

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5409

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5410

'channel': 'lex will',

5411

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5412

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5413

'tags': ['bible', 'history', 'prophesy'],

5414

'channel_follower_count': int

5415

},

5416

'playlist_mincount': 12,

5417

}, {

5418

'note': 'Search tab',

5419

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5420

'playlist_mincount': 40,

5421

'info_dict': {

5422

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5423

'title': '3Blue1Brown - Search - linear algebra',

5424

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5425

'uploader': '3Blue1Brown',

5426

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5427

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5428

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5429

'tags': ['Mathematics'],

5430

'channel': '3Blue1Brown',

5431

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5432

'channel_follower_count': int

5433

},

5434

}, {

5435

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5436

'only_matching': True,

5437

}, {

5438

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5439

'only_matching': True,

5440

}, {

5441

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5442

'only_matching': True,

5443

}, {

5444

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5445

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5446

'info_dict': {

5447

'title': '29C3: Not my department',

5448

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5449

'uploader': 'Christiaan008',

5450

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5451

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5452

'tags': [],

5453

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5454

'view_count': int,

5455

'modified_date': '20150605',

5456

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5457

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5458

'channel': 'Christiaan008',

5459

'availability': 'public',

5460

},

5461

'playlist_count': 96,

5462

}, {

5463

'note': 'Large playlist',

5464

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5465

'info_dict': {

5466

'title': 'Uploads from Cauchemar',

5467

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5468

'uploader': 'Cauchemar',

5469

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5470

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5471

'tags': [],

5472

'modified_date': r're:\d{8}',

5473

'channel': 'Cauchemar',

5474

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5475

'view_count': int,

5476

'description': '',

5477

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5478

'availability': 'public',

5479

},

5480

'playlist_mincount': 1123,

5481

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5482

}, {

5483

'note': 'even larger playlist, 8832 videos',

5484

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5485

'only_matching': True,

5486

}, {

5487

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5488

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5489

'info_dict': {

5490

'title': 'Uploads from Interstellar Movie',

5491

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5492

'uploader': 'Interstellar Movie',

5493

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5494

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5495

'tags': [],

5496

'view_count': int,

5497

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5498

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5499

'channel': 'Interstellar Movie',

5500

'description': '',

5501

'modified_date': r're:\d{8}',

5502

'availability': 'public',

5503

},

5504

'playlist_mincount': 21,

5505

}, {

5506

'note': 'Playlist with "show unavailable videos" button',

5507

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5508

'info_dict': {

5509

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5510

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5511

'uploader': 'Phim Siêu Nhân Nhật Bản',

5512

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5513

'view_count': int,

5514

'channel': 'Phim Siêu Nhân Nhật Bản',

5515

'tags': [],

5516

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5517

'description': '',

5518

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5519

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5520

'modified_date': r're:\d{8}',

5521

'availability': 'public',

5522

},

5523

'playlist_mincount': 200,

5524

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5525

}, {

5526

'note': 'Playlist with unavailable videos in page 7',

5527

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5528

'info_dict': {

5529

'title': 'Uploads from BlankTV',

5530

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5531

'uploader': 'BlankTV',

5532

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5533

'channel': 'BlankTV',

5534

'channel_url': 'https://www.youtube.com/c/blanktv',

5535

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5536

'view_count': int,

5537

'tags': [],

5538

'uploader_url': 'https://www.youtube.com/c/blanktv',

5539

'modified_date': r're:\d{8}',

5540

'description': '',

5541

'availability': 'public',

5542

},

5543

'playlist_mincount': 1000,

5544

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5545

}, {

5546

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5547

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5548

'info_dict': {

5549

'title': 'Data Analysis with Dr Mike Pound',

5550

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5551

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5552

'uploader': 'Computerphile',

5553

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5554

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5555

'tags': [],

5556

'view_count': int,

5557

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5558

'channel_url': 'https://www.youtube.com/user/Computerphile',

5559

'channel': 'Computerphile',

5560

'availability': 'public',

5561

'modified_date': '20190712',

5562

},

5563

'playlist_mincount': 11,

5564

}, {

5565

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5566

'only_matching': True,

5567

}, {

5568

'note': 'Playlist URL that does not actually serve a playlist',

5569

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5574

'uploader': 'STREEM',

5575

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5576

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5577

'upload_date': '20150526',

5578

'license': 'Standard YouTube License',

5579

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5580

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5587

},

5588

'skip': 'This video is not available.',

5589

'add_ie': [YoutubeIE.ie_key()],

5590

}, {

5591

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5592

'only_matching': True,

5593

}, {

5594

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5595

'only_matching': True,

5596

}, {

5597

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5598

'info_dict': {

5599

'id': 'Wq15eF5vCbI', # This will keep changing

5600

'ext': 'mp4',

5601

'title': str,

5602

'uploader': 'Sky News',

5603

'uploader_id': 'skynews',

5604

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5605

'upload_date': r're:\d{8}',

5606

'description': str,

5607

'categories': ['News & Politics'],

5608

'tags': list,

5609

'like_count': int,

5610

'release_timestamp': int,

5611

'channel': 'Sky News',

5612

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5613

'age_limit': 0,

5614

'view_count': int,

5615

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5616

'playable_in_embed': True,

5617

'release_date': r're:\d+',

5618

'availability': 'public',

5619

'live_status': 'is_live',

5620

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5621

'channel_follower_count': int,

5622

'concurrent_view_count': int,

5623

},

5624

'params': {

5625

'skip_download': True,

5626

},

5627

'expected_warnings': ['Ignoring subtitle tracks found in '],

5628

}, {

5629

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5634

'uploader': 'The Young Turks',

5635

'uploader_id': 'TheYoungTurks',

5636

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5637

'upload_date': '20150715',

5638

'license': 'Standard YouTube License',

5639

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5640

'categories': ['News & Politics'],

5641

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5646

},

5647

'only_matching': True,

5648

}, {

5649

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5650

'only_matching': True,

5651

}, {

5652

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5653

'only_matching': True,

5654

}, {

5655

'note': 'A channel that is not live. Should raise error',

5656

'url': 'https://www.youtube.com/user/numberphile/live',

5657

'only_matching': True,

5658

}, {

5659

'url': 'https://www.youtube.com/feed/trending',

5660

'only_matching': True,

5661

}, {

5662

'url': 'https://www.youtube.com/feed/library',

5663

'only_matching': True,

5664

}, {

5665

'url': 'https://www.youtube.com/feed/history',

5666

'only_matching': True,

5667

}, {

5668

'url': 'https://www.youtube.com/feed/subscriptions',

5669

'only_matching': True,

5670

}, {

5671

'url': 'https://www.youtube.com/feed/watch_later',

5672

'only_matching': True,

5673

}, {

5674

'note': 'Recommended - redirects to home page.',

5675

'url': 'https://www.youtube.com/feed/recommended',

5676

'only_matching': True,

5677

}, {

5678

'note': 'inline playlist with not always working continuations',

5679

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5680

'only_matching': True,

5681

}, {

5682

'url': 'https://www.youtube.com/course',

5683

'only_matching': True,

5684

}, {

5685

'url': 'https://www.youtube.com/zsecurity',

5686

'only_matching': True,

5687

}, {

5688

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5689

'only_matching': True,

5690

}, {

5691

'url': 'https://www.youtube.com/TheYoungTurks/live',

5692

'only_matching': True,

5693

}, {

5694

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5701

}, {

5702

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5703

'only_matching': True,

5704

}, {

5705

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5706

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5707

'only_matching': True

5708

}, {

5709

'note': '/browse/ should redirect to /channel/',

5710

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5711

'only_matching': True

5712

}, {

5713

'note': 'VLPL, should redirect to playlist?list=PL...',

5714

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5715

'info_dict': {

5716

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5717

'uploader': 'NoCopyrightSounds',

5718

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5719

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5720

'title': 'NCS : All Releases 💿',

5721

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5722

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5723

'modified_date': r're:\d{8}',

5724

'view_count': int,

5725

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5726

'tags': [],

5727

'channel': 'NoCopyrightSounds',

5728

'availability': 'public',

5729

},

5730

'playlist_mincount': 166,

5731

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5732

}, {

5733

'note': 'Topic, should redirect to playlist?list=UU...',

5734

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5735

'info_dict': {

5736

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5737

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5738

'title': 'Uploads from Royalty Free Music - Topic',

5739

'uploader': 'Royalty Free Music - Topic',

5740

'tags': [],

5741

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5742

'channel': 'Royalty Free Music - Topic',

5743

'view_count': int,

5744

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5745

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5746

'modified_date': r're:\d{8}',

5747

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5748

'description': '',

5749

'availability': 'public',

5750

},

5751

'playlist_mincount': 101,

5752

}, {

5753

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5754

# Treat as a general feed

5755

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5756

'info_dict': {

5757

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5758

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5759

'tags': [],

5760

},

5761

'playlist_mincount': 9,

5762

}, {

5763

'note': 'Youtube music Album',

5764

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5765

'info_dict': {

5766

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5767

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5772

'modified_date': r're:\d{8}',

5773

},

5774

'playlist_count': 50,

5775

}, {

5776

'note': 'unlisted single video playlist',

5777

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5778

'info_dict': {

5779

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5780

'uploader': 'colethedj',

5781

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5782

'title': 'yt-dlp unlisted playlist test',

5783

'availability': 'unlisted',

5784

'tags': [],

5785

'modified_date': '20220418',

5786

'channel': 'colethedj',

5787

'view_count': int,

5788

'description': '',

5789

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5790

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5791

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5796

'url': 'https://www.youtube.com/feed/recommended',

5797

'info_dict': {

5798

'id': 'recommended',

5799

'title': 'recommended',

5800

'tags': [],

5801

},

5802

'playlist_mincount': 50,

5803

'params': {

5804

'skip_download': True,

5805

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5806

},

5807

}, {

5808

'note': 'API Fallback: /videos tab, sorted by oldest first',

5809

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5810

'info_dict': {

5811

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5812

'title': 'Cody\'sLab - Videos',

5813

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5814

'uploader': 'Cody\'sLab',

5815

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5816

'channel': 'Cody\'sLab',

5817

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5818

'tags': [],

5819

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5820

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5821

'channel_follower_count': int

5822

},

5823

'playlist_mincount': 650,

5824

'params': {

5825

'skip_download': True,

5826

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5827

},

5828

'skip': 'Query for sorting no longer works',

5829

}, {

5830

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5831

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5832

'info_dict': {

5833

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5834

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5835

'title': 'Uploads from Royalty Free Music - Topic',

5836

'uploader': 'Royalty Free Music - Topic',

5837

'modified_date': r're:\d{8}',

5838

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5839

'description': '',

5840

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5841

'tags': [],

5842

'channel': 'Royalty Free Music - Topic',

5843

'view_count': int,

5844

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5845

'availability': 'public',

5846

},

5847

'playlist_mincount': 101,

5848

'params': {

5849

'skip_download': True,

5850

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5851

},

5852

}, {

5853

'note': 'non-standard redirect to regional channel',

5854

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5855

'only_matching': True

5856

}, {

5857

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5858

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5859

'info_dict': {

5860

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5861

'modified_date': '20220407',

5862

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5863

'tags': [],

5864

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5865

'uploader': 'pukkandan',

5866

'availability': 'unlisted',

5867

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5868

'channel': 'pukkandan',

5869

'description': 'Test for collaborative playlist',

5870

'title': 'yt-dlp test - collaborative playlist',

5871

'view_count': int,

5872

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5873

},

5874

'playlist_mincount': 2

5875

}, {

5876

'note': 'translated tab name',

5877

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5878

'info_dict': {

5879

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5880

'tags': [],

5881

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5882

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5883

'description': 'test description',

5884

'title': 'cole-dlp-test-acc - 再生リスト',

5885

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5886

'uploader': 'cole-dlp-test-acc',

5887

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5888

'channel': 'cole-dlp-test-acc',

5889

},

5890

'playlist_mincount': 1,

5891

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5892

'expected_warnings': ['Preferring "ja"'],

5893

}, {

5894

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5895

'note': 'preferred lang set with playlist with translated video titles',

5896

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5897

'info_dict': {

5898

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5899

'tags': [],

5900

'view_count': int,

5901

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5902

'uploader': 'cole-dlp-test-acc',

5903

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5904

'channel': 'cole-dlp-test-acc',

5905

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5906

'description': 'test',

5907

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5908

'title': 'dlp test playlist',

5909

'availability': 'public',

5910

},

5911

'playlist_mincount': 1,

5912

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5913

'expected_warnings': ['Preferring "ja"'],

5914

}, {

5915

# shorts audio pivot for 2GtVksBMYFM.

5916

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5917

'info_dict': {

5918

'id': 'sfv_audio_pivot',

5919

'title': 'sfv_audio_pivot',

5920

'tags': [],

5921

},

5922

'playlist_mincount': 50,

5923

5924

}, {

5925

# Channel with a real live tab (not to be mistaken with streams tab)

5926

# Do not treat like it should redirect to live stream

5927

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

5928

'info_dict': {

5929

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

5930

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

5931

'tags': [],

5932

},

5933

'playlist_mincount': 20,

5934

}, {

5935

# Tab name is not the same as tab id

5936

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

5937

'info_dict': {

5938

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5939

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

5940

'tags': [],

5941

},

5942

'playlist_mincount': 8,

5943

}, {

5944

# Home tab id is literally home. Not to get mistaken with featured

5945

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

5946

'info_dict': {

5947

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5948

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

5949

'tags': [],

5950

},

5951

'playlist_mincount': 8,

5952

}, {

5953

# Should get three playlists for videos, shorts and streams tabs

5954

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5955

'info_dict': {

5956

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5957

'title': 'Polka Ch. 尾丸ポルカ',

5958

'channel_follower_count': int,

5959

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5960

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5961

'uploader': 'Polka Ch. 尾丸ポルカ',

5962

'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',

5963

'channel': 'Polka Ch. 尾丸ポルカ',

5964

'tags': 'count:35',

5965

'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5966

'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

5971

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

5972

'info_dict': {

5973

'id': 'UC0intLFzLaudFG-xAvUEO-A',

5974

'title': 'Not Just Bikes - Shorts',

5975

'tags': 'count:12',

5976

'uploader': 'Not Just Bikes',

5977

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5978

'description': 'md5:7513148b1f02b924783157d84c4ea555',

5979

'channel_follower_count': int,

5980

'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',

5981

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

5982

'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5983

'channel': 'Not Just Bikes',

5984

},

5985

'playlist_mincount': 10,

5986

}, {

5987

# Streams tab

5988

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

5989

'info_dict': {

5990

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5991

'title': '中村悠一 - Live',

5992

'tags': 'count:7',

5993

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5994

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5995

'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5996

'channel': '中村悠一',

5997

'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5998

'channel_follower_count': int,

5999

'uploader': '中村悠一',

6000

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

6001

},

6002

'playlist_mincount': 60,

6003

}, {

6004

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

6005

# See test_youtube_lists

6006

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

6007

'only_matching': True,

6008

}, {

6009

# No uploads and no UCID given. Should fail with no uploads error

6010

# See test_youtube_lists

6011

'url': 'https://www.youtube.com/news',

6012

'only_matching': True

6013

}, {

6014

# No videos tab but has a shorts tab

6015

'url': 'https://www.youtube.com/c/TKFShorts',

6016

'info_dict': {

6017

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6018

'title': 'Shorts Break - Shorts',

6019

'tags': 'count:32',

6020

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6021

'channel': 'Shorts Break',

6022

'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',

6023

'uploader': 'Shorts Break',

6024

'channel_follower_count': int,

6025

'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

6026

'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

6027

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

6028

},

6029

'playlist_mincount': 30,

6030

}, {

6031

# Trending Now Tab. tab id is empty

6032

'url': 'https://www.youtube.com/feed/trending',

6033

'info_dict': {

6034

'id': 'trending',

6035

'title': 'trending - Now',

6036

'tags': [],

6037

},

6038

'playlist_mincount': 30,

6039

}, {

6040

# Trending Gaming Tab. tab id is empty

6041

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

6042

'info_dict': {

6043

'id': 'trending',

6044

'title': 'trending - Gaming',

6045

'tags': [],

6046

},

6047

'playlist_mincount': 30,

6048

}, {

6049

# Shorts url result in shorts tab

6050

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

6051

'info_dict': {

6052

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6053

'title': 'cole-dlp-test-acc - Shorts',

6054

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6055

'channel': 'cole-dlp-test-acc',

6056

'description': 'test description',

6057

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6058

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6059

'tags': [],

6060

'uploader': 'cole-dlp-test-acc',

6061

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

6069

'id': 'sSM9J5YH_60',

6070

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6071

'title': 'SHORT short',

6072

'channel': 'cole-dlp-test-acc',

6073

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

6079

}, {

6080

# Live video status should be extracted

6081

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

6082

'info_dict': {

6083

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6084

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

6092

'id': str,

6093

'title': str,

6094

'live_status': 'is_live',

6095

'channel_id': str,

6096

'channel_url': str,

6097

'concurrent_view_count': int,

'channel': str,

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6102

'playlist_mincount': 1

6103

}, {

6104

# Channel renderer metadata. Contains number of videos on the channel

6105

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',

6106

'info_dict': {

6107

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6108

'title': 'cole-dlp-test-acc - Channels',

6109

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6110

'channel': 'cole-dlp-test-acc',

6111

'description': 'test description',

6112

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6113

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6114

'tags': [],

6115

'uploader': 'cole-dlp-test-acc',

6116

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'YoutubeTab',

6123

'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6124

'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6125

'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6126

'title': 'PewDiePie',

6127

'channel': 'PewDiePie',

6128

'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6129

'thumbnails': list,

6130

'channel_follower_count': int,

6131

'playlist_count': int

6132

}

6133

}],

6134

'params': {'extract_flat': True},

6135

}, {

6136

'url': 'https://www.youtube.com/@3blue1brown/about',

6137

'info_dict': {

6138

'id': 'UCYO_jab_esuFRV4b17AJtAw',

6139

'tags': ['Mathematics'],

6140

'title': '3Blue1Brown - About',

6141

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

6142

'channel_follower_count': int,

6143

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

6144

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

6145

'channel': '3Blue1Brown',

6146

'uploader': '3Blue1Brown',

6147

'view_count': int,

6148

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

6149

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

},

'playlist_count': 0,

}]

@classmethod

def suitable(cls, url):

6156

return False if YoutubeIE.suitable(url) else super().suitable(url)

6157

6158

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

6159

6160

def _get_url_mobj(self, url):

6161

mobj = self._URL_RE.match(url).groupdict()

6162

mobj.update((k, '') for k, v in mobj.items() if v is None)

6163

return mobj

6164

6165

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

6166

tab_name = (tab.get('title') or '').lower()

6167

tab_url = urljoin(base_url, traverse_obj(

6168

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

6169

6170

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

6171

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

6172

if tab_id:

6173

return {

6174

'TAB_ID_SPONSORSHIPS': 'membership',

6175

}.get(tab_id, tab_id), tab_name

6176

6177

# Fallback to tab name if we cannot get the tab id.

6178

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

6179

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

6180

if tab_name:

6181

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

6186

6187

def _has_tab(self, tabs, tab_id):

6188

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

6189

6190

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

6191

def _real_extract(self, url, smuggled_data):

6192

item_id = self._match_id(url)

6193

url = urllib.parse.urlunparse(

6194

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

6195

compat_opts = self.get_param('compat_opts', [])

6196

6197

mobj = self._get_url_mobj(url)

6198

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

6199

if is_channel and smuggled_data.get('is_music_url'):

6200

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

6201

return self.url_result(

6202

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

6203

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

6204

mdata = self._extract_tab_endpoint(

6205

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

6206

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

6207

get_all=False, expected_type=str)

6208

if not murl:

6209

raise ExtractorError('Failed to resolve album to playlist')

6210

return self.url_result(murl, YoutubeTabIE)

6211

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

6212

return self.url_result(

6213

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

6214

6215

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

6216

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

6217

url = f'{pre}/videos{post}'

6218

if smuggled_data.get('is_music_url'):

6219

self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')

6220

6221

# Handle both video/playlist URLs

6222

qs = parse_qs(url)

6223

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

6224

if not video_id and mobj['not_channel'].startswith('watch'):

6225

if not playlist_id:

6226

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

6227

raise ExtractorError('A video URL was given without video ID', expected=True)

6228

# Common mistake: https://www.youtube.com/watch?list=playlist_id

6229

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

6230

return self.url_result(

6231

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

6232

6233

if not self._yes_playlist(playlist_id, video_id):

6234

return self.url_result(

6235

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6236

6237

data, ytcfg = self._extract_data(url, display_id)

6238

6239

# YouTube may provide a non-standard redirect to the regional channel

6240

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6241

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6242

redirect_url = traverse_obj(

6243

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6244

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6245

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6246

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6247

return self.url_result(redirect_url, YoutubeTabIE)

6248

6249

tabs, extra_tabs = self._extract_tab_renderers(data), []

6250

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6251

selected_tab = self._extract_selected_tab(tabs)

6252

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6253

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6254

6255

if not original_tab_id and selected_tab_name:

6256

self.to_screen('Downloading all uploads of the channel. '

6257

'To download only the videos in a specific tab, pass the tab\'s URL')

6258

if self._has_tab(tabs, 'streams'):

6259

extra_tabs.append(''.join((pre, '/streams', post)))

6260

if self._has_tab(tabs, 'shorts'):

6261

extra_tabs.append(''.join((pre, '/shorts', post)))

6262

# XXX: Members-only tab should also be extracted

6263

6264

if not extra_tabs and selected_tab_id != 'videos':

6265

# Channel does not have streams, shorts or videos tabs

6266

if item_id[:2] != 'UC':

6267

raise ExtractorError('This channel has no uploads', expected=True)

6268

6269

# Topic channels don't have /videos. Use the equivalent playlist instead

6270

pl_id = f'UU{item_id[2:]}'

6271

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6272

try:

6273

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6274

except ExtractorError:

6275

raise ExtractorError('This channel has no uploads', expected=True)

6276

else:

6277

item_id, url = pl_id, pl_url

6278

self.to_screen(

6279

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6280

6281

elif extra_tabs and selected_tab_id != 'videos':

6282

# When there are shorts/live tabs but not videos tab

6283

url, data = f'{pre}{post}', None

6284

6285

elif (original_tab_id or 'videos') != selected_tab_id:

6286

if original_tab_id == 'live':

6287

# Live tab should have redirected to the video

6288

# Except in the case the channel has an actual live tab

6289

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6290

raise UserNotLive(video_id=item_id)

6291

elif selected_tab_name:

6292

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6293

6294

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6295

url = f'{pre}{post}'

6296

6297

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6298

if 'no-youtube-unavailable-videos' not in compat_opts:

6299

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6300

self._extract_and_report_alerts(data, only_once=True)

6301

6302

tabs, entries = self._extract_tab_renderers(data), []

6303

if tabs:

6304

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6305

entries[0].update({

6306

'extractor_key': YoutubeTabIE.ie_key(),

6307

'extractor': YoutubeTabIE.IE_NAME,

6308

'webpage_url': url,

6309

})

6310

if self.get_param('playlist_items') == '0':

6311

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6312

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6313

entries.extend(map(self._real_extract, extra_tabs))

6314

6315

if len(entries) == 1:

6316

return entries[0]

6317

elif entries:

6318

metadata = self._extract_metadata_from_tabs(item_id, data)

6319

uploads_url = 'the Uploads (UU) playlist URL'

6320

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6321

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6322

self.to_screen(

6323

'Downloading as multiple playlists, separated by tabs. '

6324

f'To download as a single playlist instead, pass {uploads_url}')

6325

return self.playlist_result(entries, item_id, **metadata)

6326

6327

# Inline playlist

6328

playlist = traverse_obj(

6329

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6330

if playlist:

6331

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6332

6333

video_id = traverse_obj(

6334

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6335

if video_id:

6336

if tab != '/live': # live tab is expected to redirect to video

6337

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6338

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6339

6340

raise ExtractorError('Unable to recognize tab page')

6341

6342

6343

class YoutubePlaylistIE(InfoExtractor):

6344

IE_DESC = 'YouTube playlists'

6345

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6356

)''' % {

6357

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6358

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6359

}

6360

IE_NAME = 'youtube:playlist'

6361

_TESTS = [{

6362

'note': 'issue #673',

6363

'url': 'PLBB231211A4F62143',

6364

'info_dict': {

6365

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6366

'id': 'PLBB231211A4F62143',

6367

'uploader': 'Wickman',

6368

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6369

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6370

'view_count': int,

6371

'uploader_url': 'https://www.youtube.com/c/WickmanVT',

6372

'modified_date': r're:\d{8}',

6373

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6374

'channel': 'Wickman',

6375

'tags': [],

6376

'channel_url': 'https://www.youtube.com/c/WickmanVT',

6377

'availability': 'public',

6378

},

6379

'playlist_mincount': 29,

6380

}, {

6381

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6382

'info_dict': {

6383

'title': 'YDL_safe_search',

6384

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6385

},

6386

'playlist_count': 2,

6387

'skip': 'This playlist is private',

6388

}, {

6389

'note': 'embedded',

6390

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6395

'uploader': 'milan',

6396

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6397

'description': '',

6398

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6399

'tags': [],

6400

'modified_date': '20140919',

6401

'view_count': int,

6402

'channel': 'milan',

6403

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6404

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6405

'availability': 'public',

6406

},

6407

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],

6408

}, {

6409

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6410

'playlist_mincount': 455,

6411

'info_dict': {

6412

'title': '2018 Chinese New Singles (11/6 updated)',

6413

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6414

'uploader': 'LBK',

6415

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6416

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6417

'channel': 'LBK',

6418

'view_count': int,

6419

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

6420

'tags': [],

6421

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

6422

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6423

'modified_date': r're:\d{8}',

6424

'availability': 'public',

6425

},

6426

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6427

}, {

6428

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6429

'only_matching': True,

6430

}, {

6431

# music album playlist

6432

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6433

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6438

if YoutubeTabIE.suitable(url):

6439

return False

6440

from ..utils import parse_qs

6441

qs = parse_qs(url)

6442

if qs.get('v', [None])[0]:

6443

return False

6444

return super().suitable(url)

6445

6446

def _real_extract(self, url):

6447

playlist_id = self._match_id(url)

6448

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6449

url = update_url_query(

6450

'https://www.youtube.com/playlist',

6451

parse_qs(url) or {'list': playlist_id})

6452

if is_music_url:

6453

url = smuggle_url(url, {'is_music_url': True})

6454

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6455

6456

6457

class YoutubeYtBeIE(InfoExtractor):

6458

IE_DESC = 'youtu.be'

6459

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6460

_TESTS = [{

6461

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6466

'uploader': 'Backus-Page House Museum',

6467

'uploader_id': 'backuspagemuseum',

6468

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

6469

'upload_date': '20161008',

6470

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6471

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6476

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

6477

'channel': 'Backus-Page House Museum',

6478

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6479

'live_status': 'not_live',

6480

'view_count': int,

6481

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6482

'availability': 'public',

6483

'duration': 59,

6484

'comment_count': int,

6485

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6490

},

6491

}, {

6492

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6493

'only_matching': True,

6494

}]

6495

6496

def _real_extract(self, url):

6497

mobj = self._match_valid_url(url)

6498

video_id = mobj.group('id')

6499

playlist_id = mobj.group('playlist_id')

6500

return self.url_result(

6501

update_url_query('https://www.youtube.com/watch', {

6502

'v': video_id,

6503

'list': playlist_id,

6504

'feature': 'youtu.be',

6505

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6506

6507

6508

class YoutubeLivestreamEmbedIE(InfoExtractor):

6509

IE_DESC = 'YouTube livestream embeds'

6510

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6511

_TESTS = [{

6512

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6513

'only_matching': True,

6514

}]

6515

6516

def _real_extract(self, url):

6517

channel_id = self._match_id(url)

6518

return self.url_result(

6519

f'https://www.youtube.com/channel/{channel_id}/live',

6520

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6521

6522

6523

class YoutubeYtUserIE(InfoExtractor):

6524

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6525

IE_NAME = 'youtube:user'

6526

_VALID_URL = r'ytuser:(?P<id>.+)'

6527

_TESTS = [{

6528

'url': 'ytuser:phihag',

6529

'only_matching': True,

6530

}]

6531

6532

def _real_extract(self, url):

6533

user_id = self._match_id(url)

6534

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6535

6536

6537

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6538

IE_NAME = 'youtube:favorites'

6539

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6540

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6541

_LOGIN_REQUIRED = True

6542

_TESTS = [{

6543

'url': ':ytfav',

6544

'only_matching': True,

6545

}, {

6546

'url': ':ytfavorites',

6547

'only_matching': True,

6548

}]

6549

6550

def _real_extract(self, url):

6551

return self.url_result(

6552

'https://www.youtube.com/playlist?list=LL',

6553

ie=YoutubeTabIE.ie_key())

6554

6555

6556

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6557

IE_NAME = 'youtube:notif'

6558

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6559

_VALID_URL = r':ytnotif(?:ication)?s?'

6560

_LOGIN_REQUIRED = True

6561

_TESTS = [{

6562

'url': ':ytnotif',

6563

'only_matching': True,

6564

}, {

6565

'url': ':ytnotifications',

6566

'only_matching': True,

6567

}]

6568

6569

def _extract_notification_menu(self, response, continuation_list):

6570

notification_list = traverse_obj(

6571

response,

6572

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6573

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6574

expected_type=list) or []

6575

continuation_list[0] = None

6576

for item in notification_list:

6577

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6578

if entry:

6579

yield entry

6580

continuation = item.get('continuationItemRenderer')

6581

if continuation:

6582

continuation_list[0] = continuation

6583

6584

def _extract_notification_renderer(self, notification):

6585

video_id = traverse_obj(

6586

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6587

url = f'https://www.youtube.com/watch?v={video_id}'

6588

channel_id = None

6589

if not video_id:

6590

browse_ep = traverse_obj(

6591

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6592

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6593

post_id = self._search_regex(

6594

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6595

'post id', default=None)

6596

if not channel_id or not post_id:

6597

return

6598

# The direct /post url redirects to this in the browser

6599

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6600

6601

channel = traverse_obj(

6602

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6603

expected_type=str)

6604

notification_title = self._get_text(notification, 'shortMessage')

6605

if notification_title:

6606

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6607

# TODO: handle recommended videos

6608

title = self._search_regex(

6609

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6610

'video title', default=None)

6611

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6612

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6618

'video_id': video_id,

6619

'title': title,

6620

'channel_id': channel_id,

6621

'channel': channel,

6622

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6623

'timestamp': timestamp,

6624

}

6625

6626

def _notification_menu_entries(self, ytcfg):

6627

continuation_list = [None]

6628

response = None

6629

for page in itertools.count(1):

6630

ctoken = traverse_obj(

6631

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6632

response = self._extract_response(

6633

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6634

ep='notification/get_notification_menu', check_get_keys='actions',

6635

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6636

yield from self._extract_notification_menu(response, continuation_list)

6637

if not continuation_list[0]:

6638

break

6639

6640

def _real_extract(self, url):

6641

display_id = 'notifications'

6642

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6643

self._report_playlist_authcheck(ytcfg)

6644

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6645

6646

6647

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6648

IE_DESC = 'YouTube search'

6649

IE_NAME = 'youtube:search'

6650

_SEARCH_KEY = 'ytsearch'

6651

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6652

_TESTS = [{

6653

'url': 'ytsearch5:youtube-dl test video',

6654

'playlist_count': 5,

6655

'info_dict': {

6656

'id': 'youtube-dl test video',

6657

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6663

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6664

_SEARCH_KEY = 'ytsearchdate'

6665

IE_DESC = 'YouTube search, newest videos first'

6666

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6667

_TESTS = [{

6668

'url': 'ytsearchdate5:youtube-dl test video',

6669

'playlist_count': 5,

6670

'info_dict': {

6671

'id': 'youtube-dl test video',

6672

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6678

IE_DESC = 'YouTube search URLs with sorting and filter support'

6679

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6680

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6681

_TESTS = [{

6682

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6683

'playlist_mincount': 5,

6684

'info_dict': {

6685

'id': 'youtube-dl test video',

6686

'title': 'youtube-dl test video',

6687

}

6688

}, {

6689

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6690

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6697

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6702

# 'entries': [{

6703

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

# Channel results

'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',

6710

'info_dict': {

6711

'id': 'kurzgesagt',

6712

'title': 'kurzgesagt',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6718

'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

6719

'ie_key': 'YoutubeTab',

6720

'channel': 'Kurzgesagt – In a Nutshell',

6721

'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',

6722

'title': 'Kurzgesagt – In a Nutshell',

6723

'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6724

'playlist_count': int, # XXX: should have a way of saying > 1

6725

'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

'thumbnails': list

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6730

'playlist_mincount': 1,

6731

}, {

6732

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6733

'only_matching': True,

6734

}]

6735

6736

def _real_extract(self, url):

6737

qs = parse_qs(url)

6738

query = (qs.get('search_query') or qs.get('q'))[0]

6739

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6740

6741

6742

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6743

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6744

IE_NAME = 'youtube:music:search_url'

6745

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6746

_TESTS = [{

6747

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6748

'playlist_count': 16,

6749

'info_dict': {

6750

'id': 'royalty free music',

6751

'title': 'royalty free music',

6752

}

6753

}, {

6754

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6755

'playlist_mincount': 30,

6756

'info_dict': {

6757

'id': 'royalty free music - songs',

6758

'title': 'royalty free music - songs',

6759

},

6760

'params': {'extract_flat': 'in_playlist'}

6761

}, {

6762

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6763

'playlist_mincount': 30,

6764

'info_dict': {

6765

'id': 'royalty free music - community playlists',

6766

'title': 'royalty free music - community playlists',

6767

},

6768

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6773

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6774

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6775

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6776

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6777

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6778

}

6779

6780

def _real_extract(self, url):

6781

qs = parse_qs(url)

6782

query = (qs.get('search_query') or qs.get('q'))[0]

6783

params = qs.get('sp', (None,))[0]

6784

if params:

6785

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6786

else:

6787

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6788

params = self._SECTIONS.get(section)

6789

if not params:

6790

section = None

6791

title = join_nonempty(query, section, delim=' - ')

6792

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6793

6794

6795

class YoutubeFeedsInfoExtractor(InfoExtractor):

6796

"""

6797

Base class for feed extractors

6798

Subclasses must re-define the _FEED_NAME property.

6799

"""

6800

_LOGIN_REQUIRED = True

6801

_FEED_NAME = 'feeds'

6802

6803

def _real_initialize(self):

6804

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6809

6810

def _real_extract(self, url):

6811

return self.url_result(

6812

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6813

6814

6815

class YoutubeWatchLaterIE(InfoExtractor):

6816

IE_NAME = 'youtube:watchlater'

6817

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6818

_VALID_URL = r':ytwatchlater'

6819

_TESTS = [{

6820

'url': ':ytwatchlater',

6821

'only_matching': True,

6822

}]

6823

6824

def _real_extract(self, url):

6825

return self.url_result(

6826

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6827

6828

6829

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6830

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6831

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6832

_FEED_NAME = 'recommended'

6833

_LOGIN_REQUIRED = False

6834

_TESTS = [{

6835

'url': ':ytrec',

6836

'only_matching': True,

6837

}, {

6838

'url': ':ytrecommended',

6839

'only_matching': True,

6840

}, {

6841

'url': 'https://youtube.com',

6842

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6847

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6848

_VALID_URL = r':ytsub(?:scription)?s?'

6849

_FEED_NAME = 'subscriptions'

6850

_TESTS = [{

6851

'url': ':ytsubs',

6852

'only_matching': True,

6853

}, {

6854

'url': ':ytsubscriptions',

6855

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6860

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6861

_VALID_URL = r':ythis(?:tory)?'

6862

_FEED_NAME = 'history'

6863

_TESTS = [{

6864

'url': ':ythistory',

6865

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6870

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6871

IE_NAME = 'youtube:stories'

6872

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6873

_TESTS = [{

6874

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6875

'only_matching': True,

6876

}]

6877

6878

def _real_extract(self, url):

6879

playlist_id = f'RLTD{self._match_id(url)}'

6880

return self.url_result(

6881

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6882

ie=YoutubeTabIE, video_id=playlist_id)

6883

6884

6885

class YoutubeShortsAudioPivotIE(InfoExtractor):

6886

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6887

IE_NAME = 'youtube:shorts:pivot:audio'

6888

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6889

_TESTS = [{

6890

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6891

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6896

"""

6897

Generates sfv_audio_pivot browse params for this video id

6898

"""

6899

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6900

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6901

6902

def _real_extract(self, url):

6903

video_id = self._match_id(url)

6904

return self.url_result(

6905

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6910

IE_NAME = 'youtube:truncated_url'

6911

IE_DESC = False # Do not list

6912

_VALID_URL = r'''(?x)

6913

(?:https?://)?

6914

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6915

(?:watch\?(?:

6916

feature=[a-z_]+|

6917

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6930

'only_matching': True,

6931

}, {

6932

'url': 'https://www.youtube.com/watch?',

6933

'only_matching': True,

6934

}, {

6935

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6936

'only_matching': True,

6937

}, {

6938

'url': 'https://www.youtube.com/watch?feature=foo',

6939

'only_matching': True,

6940

}, {

6941

'url': 'https://www.youtube.com/watch?hl=en-GB',

6942

'only_matching': True,

6943

}, {

6944

'url': 'https://www.youtube.com/watch?t=2372',

6945

'only_matching': True,

6946

}]

6947

6948

def _real_extract(self, url):

6949

raise ExtractorError(

6950

'Did you forget to quote the URL? Remember that & is a meta '

6951

'character in most shells, so you want to put the URL in quotes, '

6952

'like youtube-dl '

6953

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6954

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6959

IE_NAME = 'youtube:clip'

6960

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6961

_TESTS = [{

6962

# FIXME: Other metadata should be extracted from the clip, not from the base video

6963

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6964

'info_dict': {

6965

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6966

'ext': 'mp4',

6967

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6972

'categories': ['Gaming'],

6973

'channel': 'Scott The Woz',

6974

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6975

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6976

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6977

'like_count': int,

6978

'playable_in_embed': True,

6979

'tags': 'count:17',

6980

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6981

'title': 'Mobile Games on Console - Scott The Woz',

6982

'upload_date': '20210920',

6983

'uploader': 'Scott The Woz',

6984

'uploader_id': 'scottthewoz',

6985

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6986

'view_count': int,

6987

'live_status': 'not_live',

6988

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6993

clip_id = self._match_id(url)

6994

_, data = self._extract_webpage(url, clip_id)

6995

6996

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6997

if not video_id:

6998

raise ExtractorError('Unable to find video ID')

6999

7000

clip_data = traverse_obj(data, (

7001

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

7002

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

7003

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

7004

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

7005

7006

return {

7007

'_type': 'url_transparent',

7008

'url': f'https://www.youtube.com/watch?v={video_id}',

7009

'ie_key': YoutubeIE.ie_key(),

7010

'id': clip_id,

7011

'section_start': int(clip_data['startTimeMs']) / 1000,

7012

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):

7017

IE_NAME = 'youtube:consent'

7018

IE_DESC = False # Do not list

7019

_VALID_URL = r'https?://consent\.youtube\.com/m\?'

7020

_TESTS = [{

7021

'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

7027

'comment_count': int,

7028

'chapters': 'count:13',

7029

'upload_date': '20221223',

7030

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

7031

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

7032

'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

7033

'like_count': int,

7034

'release_date': '20221223',

7035

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

7036

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

7037

'view_count': int,

7038

'playable_in_embed': True,

7039

'duration': 4438,

7040

'availability': 'public',

7041

'channel_follower_count': int,

7042

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

7043

'categories': ['Entertainment'],

7044

'live_status': 'was_live',

7045

'release_timestamp': 1671793345,

7046

'channel': 'さなちゃんねる',

7047

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

7048

'uploader': 'さなちゃんねる',

7049

},

7050

'add_ie': ['Youtube'],

7051

'params': {'skip_download': 'Youtube'},

7052

}]

7053

7054

def _real_extract(self, url):

7055

redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])

7056

if not redirect_url:

7057

raise ExtractorError('Invalid cookie consent redirect URL', expected=True)

7058

return self.url_result(redirect_url)

7059

7060

7061

class YoutubeTruncatedIDIE(InfoExtractor):

7062

IE_NAME = 'youtube:truncated_id'

7063

IE_DESC = False # Do not list

7064

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

7065

7066

_TESTS = [{

7067

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

7068

'only_matching': True,

7069

}]

7070

7071

def _real_extract(self, url):

7072

video_id = self._match_id(url)

7073

raise ExtractorError(

7074

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

7075

expected=True)