jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import collections
	4	import copy
	5	import datetime
	6	import enum
	7	import hashlib
	8	import itertools
	9	import json
	10	import math
	11	import os.path
	12	import random
	13	import re
	14	import sys
	15	import threading
	16	import time
	17	import traceback
	18	import urllib.error
	19	import urllib.parse
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from .openload import PhantomJSwrapper
	23	from ..compat import functools
	24	from ..jsinterp import JSInterpreter
	25	from ..utils import (
	26	NO_DEFAULT,
	27	ExtractorError,
	28	LazyList,
	29	UserNotLive,
	30	bug_reports_message,
	31	classproperty,
	32	clean_html,
	33	datetime_from_str,
	34	dict_get,
	35	filter_dict,
	36	float_or_none,
	37	format_field,
	38	get_first,
	39	int_or_none,
	40	is_html,
	41	join_nonempty,
	42	js_to_json,
	43	mimetype2ext,
	44	network_exceptions,
	45	orderedSet,
	46	parse_codecs,
	47	parse_count,
	48	parse_duration,
	49	parse_iso8601,
	50	parse_qs,
	51	qualities,
	52	remove_start,
	53	smuggle_url,
	54	str_or_none,
	55	str_to_int,
	56	strftime_or_none,
	57	traverse_obj,
	58	try_get,
	59	unescapeHTML,
	60	unified_strdate,
	61	unified_timestamp,
	62	unsmuggle_url,
	63	update_url_query,
	64	url_or_none,
	65	urljoin,
	66	variadic,
	67	)
	68
	69	# any clients starting with _ cannot be explicitly requested by the user
	70	INNERTUBE_CLIENTS = {
	71	'web': {
	72	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	73	'INNERTUBE_CONTEXT': {
	74	'client': {
	75	'clientName': 'WEB',
	76	'clientVersion': '2.20220801.00.00',
	77	}
	78	},
	79	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	80	},
	81	'web_embedded': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB_EMBEDDED_PLAYER',
	86	'clientVersion': '1.20220731.00.00',
	87	},
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	90	},
	91	'web_music': {
	92	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	93	'INNERTUBE_HOST': 'music.youtube.com',
	94	'INNERTUBE_CONTEXT': {
	95	'client': {
	96	'clientName': 'WEB_REMIX',
	97	'clientVersion': '1.20220727.01.00',
	98	}
	99	},
	100	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	101	},
	102	'web_creator': {
	103	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_CREATOR',
	107	'clientVersion': '1.20220726.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	111	},
	112	'android': {
	113	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'ANDROID',
	117	'clientVersion': '17.31.35',
	118	'androidSdkVersion': 30,
	119	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	120	}
	121	},
	122	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	123	'REQUIRE_JS_PLAYER': False
	124	},
	125	'android_embedded': {
	126	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	127	'INNERTUBE_CONTEXT': {
	128	'client': {
	129	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	130	'clientVersion': '17.31.35',
	131	'androidSdkVersion': 30,
	132	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '5.16.51',
	144	'androidSdkVersion': 30,
	145	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	146	}
	147	},
	148	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	149	'REQUIRE_JS_PLAYER': False
	150	},
	151	'android_creator': {
	152	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	153	'INNERTUBE_CONTEXT': {
	154	'client': {
	155	'clientName': 'ANDROID_CREATOR',
	156	'clientVersion': '22.30.100',
	157	'androidSdkVersion': 30,
	158	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	159	},
	160	},
	161	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	162	'REQUIRE_JS_PLAYER': False
	163	},
	164	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	165	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	166	'ios': {
	167	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	168	'INNERTUBE_CONTEXT': {
	169	'client': {
	170	'clientName': 'IOS',
	171	'clientVersion': '17.33.2',
	172	'deviceModel': 'iPhone14,3',
	173	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '17.33.2',
	184	'deviceModel': 'iPhone14,3',
	185	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '5.21',
	197	'deviceModel': 'iPhone14,3',
	198	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	199	},
	200	},
	201	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	202	'REQUIRE_JS_PLAYER': False
	203	},
	204	'ios_creator': {
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'IOS_CREATOR',
	208	'clientVersion': '22.33.101',
	209	'deviceModel': 'iPhone14,3',
	210	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	211	},
	212	},
	213	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	214	'REQUIRE_JS_PLAYER': False
	215	},
	216	# mweb has 'ultralow' formats
	217	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	218	'mweb': {
	219	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	220	'INNERTUBE_CONTEXT': {
	221	'client': {
	222	'clientName': 'MWEB',
	223	'clientVersion': '2.20220801.00.00',
	224	}
	225	},
	226	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	227	},
	228	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	229	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	230	'tv_embedded': {
	231	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	232	'INNERTUBE_CONTEXT': {
	233	'client': {
	234	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	235	'clientVersion': '2.0',
	236	},
	237	},
	238	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	239	},
	240	}
	241
	242
	243	def _split_innertube_client(client_name):
	244	variant, *base = client_name.rsplit('.', 1)
	245	if base:
	246	return variant, base[0], variant
	247	base, *variant = client_name.split('_', 1)
	248	return client_name, base, variant[0] if variant else None
	249
	250
	251	def build_innertube_clients():
	252	THIRD_PARTY = {
	253	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	254	}
	255	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	256	priority = qualities(BASE_CLIENTS[::-1])
	257
	258	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	259	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	260	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	261	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	262	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	263
	264	_, base_client, variant = _split_innertube_client(client)
	265	ytcfg['priority'] = 10 * priority(base_client)
	266
	267	if not variant:
	268	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	269	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	270	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	271	embedscreen['priority'] -= 3
	272	elif variant == 'embedded':
	273	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	274	ytcfg['priority'] -= 2
	275	else:
	276	ytcfg['priority'] -= 3
	277
	278
	279	build_innertube_clients()
	280
	281
	282	class BadgeType(enum.Enum):
	283	AVAILABILITY_UNLISTED = enum.auto()
	284	AVAILABILITY_PRIVATE = enum.auto()
	285	AVAILABILITY_PUBLIC = enum.auto()
	286	AVAILABILITY_PREMIUM = enum.auto()
	287	AVAILABILITY_SUBSCRIPTION = enum.auto()
	288	LIVE_NOW = enum.auto()
	289
	290
	291	class YoutubeBaseInfoExtractor(InfoExtractor):
	292	"""Provide base functions for Youtube extractors"""
	293
	294	_RESERVED_NAMES = (
	295	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|live\|watch_popup\|clip\|'
	296	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	297	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	298	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	299
	300	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	301
	302	# _NETRC_MACHINE = 'youtube'
	303
	304	# If True it will raise an error if no login info is provided
	305	_LOGIN_REQUIRED = False
	306
	307	_INVIDIOUS_SITES = (
	308	# invidious-redirect websites
	309	r'(?:www\.)?redirect\.invidious\.io',
	310	r'(?:(?:www\|dev)\.)?invidio\.us',
	311	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	312	r'(?:www\.)?invidious\.pussthecat\.org',
	313	r'(?:www\.)?invidious\.zee\.li',
	314	r'(?:www\.)?invidious\.ethibox\.fr',
	315	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	316	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	317	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	318	# youtube-dl invidious instances list
	319	r'(?:(?:www\|no)\.)?invidiou\.sh',
	320	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	321	r'(?:www\.)?invidious\.kabi\.tk',
	322	r'(?:www\.)?invidious\.mastodon\.host',
	323	r'(?:www\.)?invidious\.zapashcanon\.fr',
	324	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	325	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	326	r'(?:www\.)?invidious\.himiko\.cloud',
	327	r'(?:www\.)?invidious\.reallyancient\.tech',
	328	r'(?:www\.)?invidious\.tube',
	329	r'(?:www\.)?invidiou\.site',
	330	r'(?:www\.)?invidious\.site',
	331	r'(?:www\.)?invidious\.xyz',
	332	r'(?:www\.)?invidious\.nixnet\.xyz',
	333	r'(?:www\.)?invidious\.048596\.xyz',
	334	r'(?:www\.)?invidious\.drycat\.fr',
	335	r'(?:www\.)?inv\.skyn3t\.in',
	336	r'(?:www\.)?tube\.poal\.co',
	337	r'(?:www\.)?tube\.connect\.cafe',
	338	r'(?:www\.)?vid\.wxzm\.sx',
	339	r'(?:www\.)?vid\.mint\.lgbt',
	340	r'(?:www\.)?vid\.puffyan\.us',
	341	r'(?:www\.)?yewtu\.be',
	342	r'(?:www\.)?yt\.elukerio\.org',
	343	r'(?:www\.)?yt\.lelux\.fi',
	344	r'(?:www\.)?invidious\.ggc-project\.de',
	345	r'(?:www\.)?yt\.maisputain\.ovh',
	346	r'(?:www\.)?ytprivate\.com',
	347	r'(?:www\.)?invidious\.13ad\.de',
	348	r'(?:www\.)?invidious\.toot\.koeln',
	349	r'(?:www\.)?invidious\.fdn\.fr',
	350	r'(?:www\.)?watch\.nettohikari\.com',
	351	r'(?:www\.)?invidious\.namazso\.eu',
	352	r'(?:www\.)?invidious\.silkky\.cloud',
	353	r'(?:www\.)?invidious\.exonip\.de',
	354	r'(?:www\.)?invidious\.riverside\.rocks',
	355	r'(?:www\.)?invidious\.blamefran\.net',
	356	r'(?:www\.)?invidious\.moomoo\.de',
	357	r'(?:www\.)?ytb\.trom\.tf',
	358	r'(?:www\.)?yt\.cyberhost\.uk',
	359	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	360	r'(?:www\.)?qklhadlycap4cnod\.onion',
	361	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	362	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	363	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	364	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	365	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	366	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	367	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	368	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	369	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	370	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	371	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	372	r'(?:www\.)?piped\.kavin\.rocks',
	373	r'(?:www\.)?piped\.tokhmi\.xyz',
	374	r'(?:www\.)?piped\.syncpundit\.io',
	375	r'(?:www\.)?piped\.mha\.fi',
	376	r'(?:www\.)?watch\.whatever\.social',
	377	r'(?:www\.)?piped\.garudalinux\.org',
	378	r'(?:www\.)?piped\.rivo\.lol',
	379	r'(?:www\.)?piped-libre\.kavin\.rocks',
	380	r'(?:www\.)?yt\.jae\.fi',
	381	r'(?:www\.)?piped\.mint\.lgbt',
	382	r'(?:www\.)?il\.ax',
	383	r'(?:www\.)?piped\.esmailelbob\.xyz',
	384	r'(?:www\.)?piped\.projectsegfau\.lt',
	385	r'(?:www\.)?piped\.privacydev\.net',
	386	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	387	r'(?:www\.)?piped\.smnz\.de',
	388	r'(?:www\.)?piped\.adminforge\.de',
	389	r'(?:www\.)?watch\.whatevertinfoil\.de',
	390	r'(?:www\.)?piped\.qdi\.fi',
	391	r'(?:www\.)?piped\.video',
	392	r'(?:www\.)?piped\.aeong\.one',
	393	)
	394
	395	# extracted from account/account_menu ep
	396	# XXX: These are the supported YouTube UI and API languages,
	397	# which is slightly different from languages supported for translation in YouTube studio
	398	_SUPPORTED_LANG_CODES = [
	399	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	400	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	401	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	402	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	403	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	404	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	405	]
	406
	407	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	408
	409	@functools.cached_property
	410	def _preferred_lang(self):
	411	"""
	412	Returns a language code supported by YouTube for the user preferred language.
	413	Returns None if no preferred language set.
	414	"""
	415	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	416	if not preferred_lang:
	417	return
	418	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	419	raise ExtractorError(
	420	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	421	expected=True)
	422	elif preferred_lang != 'en':
	423	self.report_warning(
	424	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	425	return preferred_lang
	426
	427	def _initialize_consent(self):
	428	cookies = self._get_cookies('https://www.youtube.com/')
	429	if cookies.get('__Secure-3PSID'):
	430	return
	431	consent_id = None
	432	consent = cookies.get('CONSENT')
	433	if consent:
	434	if 'YES' in consent.value:
	435	return
	436	consent_id = self._search_regex(
	437	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	438	if not consent_id:
	439	consent_id = random.randint(100, 999)
	440	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	441
	442	def _initialize_pref(self):
	443	cookies = self._get_cookies('https://www.youtube.com/')
	444	pref_cookie = cookies.get('PREF')
	445	pref = {}
	446	if pref_cookie:
	447	try:
	448	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	449	except ValueError:
	450	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	451	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	452	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	453
	454	def _real_initialize(self):
	455	self._initialize_pref()
	456	self._initialize_consent()
	457	self._check_login_required()
	458
	459	def _check_login_required(self):
	460	if self._LOGIN_REQUIRED and not self._cookies_passed:
	461	self.raise_login_required('Login details are needed to download this content', method='cookies')
	462
	463	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	464	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	465
	466	def _get_default_ytcfg(self, client='web'):
	467	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	468
	469	def _get_innertube_host(self, client='web'):
	470	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	471
	472	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	473	# try_get but with fallback to default ytcfg client values when present
	474	_func = lambda y: try_get(y, getter, expected_type)
	475	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	476
	477	def _extract_client_name(self, ytcfg, default_client='web'):
	478	return self._ytcfg_get_safe(
	479	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	480	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	481
	482	def _extract_client_version(self, ytcfg, default_client='web'):
	483	return self._ytcfg_get_safe(
	484	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	485	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	486
	487	def _select_api_hostname(self, req_api_hostname, default_client=None):
	488	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	489	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	490
	491	def _extract_api_key(self, ytcfg=None, default_client='web'):
	492	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	493
	494	def _extract_context(self, ytcfg=None, default_client='web'):
	495	context = get_first(
	496	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	497	# Enforce language and tz for extraction
	498	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	499	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	500	return context

1

import base64

import calendar

import collections

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

22

from .openload import PhantomJSwrapper

23

from ..compat import functools

24

from ..jsinterp import JSInterpreter

25

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

70

INNERTUBE_CLIENTS = {

71

'web': {

72

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

73

'INNERTUBE_CONTEXT': {

74

'client': {

75

'clientName': 'WEB',

76

'clientVersion': '2.20220801.00.00',

77

}

78

},

79

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

80

},

81

'web_embedded': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB_EMBEDDED_PLAYER',

86

'clientVersion': '1.20220731.00.00',

87

},

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

90

},

91

'web_music': {

92

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

93

'INNERTUBE_HOST': 'music.youtube.com',

94

'INNERTUBE_CONTEXT': {

95

'client': {

96

'clientName': 'WEB_REMIX',

97

'clientVersion': '1.20220727.01.00',

98

}

99

},

100

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

101

},

102

'web_creator': {

103

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_CREATOR',

107

'clientVersion': '1.20220726.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

111

},

112

'android': {

113

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'ANDROID',

117

'clientVersion': '17.31.35',

118

'androidSdkVersion': 30,

119

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

120

}

121

},

122

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

123

'REQUIRE_JS_PLAYER': False

124

},

125

'android_embedded': {

126

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

127

'INNERTUBE_CONTEXT': {

128

'client': {

129

'clientName': 'ANDROID_EMBEDDED_PLAYER',

130

'clientVersion': '17.31.35',

131

'androidSdkVersion': 30,

132

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '5.16.51',

144

'androidSdkVersion': 30,

145

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

146

}

147

},

148

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

149

'REQUIRE_JS_PLAYER': False

150

},

151

'android_creator': {

152

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

153

'INNERTUBE_CONTEXT': {

154

'client': {

155

'clientName': 'ANDROID_CREATOR',

156

'clientVersion': '22.30.100',

157

'androidSdkVersion': 30,

158

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

159

},

160

},

161

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

162

'REQUIRE_JS_PLAYER': False

163

},

164

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

165

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

166

'ios': {

167

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

168

'INNERTUBE_CONTEXT': {

169

'client': {

170

'clientName': 'IOS',

171

'clientVersion': '17.33.2',

172

'deviceModel': 'iPhone14,3',

173

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '17.33.2',

184

'deviceModel': 'iPhone14,3',

185

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '5.21',

197

'deviceModel': 'iPhone14,3',

198

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

199

},

200

},

201

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

202

'REQUIRE_JS_PLAYER': False

203

},

204

'ios_creator': {

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'IOS_CREATOR',

208

'clientVersion': '22.33.101',

209

'deviceModel': 'iPhone14,3',

210

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

211

},

212

},

213

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

214

'REQUIRE_JS_PLAYER': False

215

},

216

# mweb has 'ultralow' formats

217

# See: https://github.com/yt-dlp/yt-dlp/pull/557

218

'mweb': {

219

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

220

'INNERTUBE_CONTEXT': {

221

'client': {

222

'clientName': 'MWEB',

223

'clientVersion': '2.20220801.00.00',

224

}

225

},

226

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

227

},

228

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

229

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

230

'tv_embedded': {

231

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

232

'INNERTUBE_CONTEXT': {

233

'client': {

234

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

235

'clientVersion': '2.0',

236

},

237

},

238

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

244

variant, *base = client_name.rsplit('.', 1)

245

if base:

246

return variant, base[0], variant

247

base, *variant = client_name.split('_', 1)

248

return client_name, base, variant[0] if variant else None

249

250

251

def build_innertube_clients():

252

THIRD_PARTY = {

253

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

254

}

255

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

256

priority = qualities(BASE_CLIENTS[::-1])

257

258

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

259

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

260

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

261

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

262

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

263

264

_, base_client, variant = _split_innertube_client(client)

265

ytcfg['priority'] = 10 * priority(base_client)

266

267

if not variant:

268

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

269

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

270

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

271

embedscreen['priority'] -= 3

272

elif variant == 'embedded':

273

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

274

ytcfg['priority'] -= 2

275

else:

276

ytcfg['priority'] -= 3

277

278

279

build_innertube_clients()

280

281

282

class BadgeType(enum.Enum):

283

AVAILABILITY_UNLISTED = enum.auto()

284

AVAILABILITY_PRIVATE = enum.auto()

285

AVAILABILITY_PUBLIC = enum.auto()

286

AVAILABILITY_PREMIUM = enum.auto()

287

AVAILABILITY_SUBSCRIPTION = enum.auto()

288

LIVE_NOW = enum.auto()

289

290

291

class YoutubeBaseInfoExtractor(InfoExtractor):

292

"""Provide base functions for Youtube extractors"""

_RESERVED_NAMES = (

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

301

302

# _NETRC_MACHINE = 'youtube'

303

304

# If True it will raise an error if no login info is provided

305

_LOGIN_REQUIRED = False

306

307

_INVIDIOUS_SITES = (

308

# invidious-redirect websites

309

r'(?:www\.)?redirect\.invidious\.io',

310

r'(?:(?:www|dev)\.)?invidio\.us',

311

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

312

r'(?:www\.)?invidious\.pussthecat\.org',

313

r'(?:www\.)?invidious\.zee\.li',

314

r'(?:www\.)?invidious\.ethibox\.fr',

315

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

316

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

317

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

318

# youtube-dl invidious instances list

319

r'(?:(?:www|no)\.)?invidiou\.sh',

320

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

321

r'(?:www\.)?invidious\.kabi\.tk',

322

r'(?:www\.)?invidious\.mastodon\.host',

323

r'(?:www\.)?invidious\.zapashcanon\.fr',

324

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

325

r'(?:www\.)?invidious\.tinfoil-hat\.net',

326

r'(?:www\.)?invidious\.himiko\.cloud',

327

r'(?:www\.)?invidious\.reallyancient\.tech',

328

r'(?:www\.)?invidious\.tube',

329

r'(?:www\.)?invidiou\.site',

330

r'(?:www\.)?invidious\.site',

331

r'(?:www\.)?invidious\.xyz',

332

r'(?:www\.)?invidious\.nixnet\.xyz',

333

r'(?:www\.)?invidious\.048596\.xyz',

334

r'(?:www\.)?invidious\.drycat\.fr',

335

r'(?:www\.)?inv\.skyn3t\.in',

336

r'(?:www\.)?tube\.poal\.co',

337

r'(?:www\.)?tube\.connect\.cafe',

338

r'(?:www\.)?vid\.wxzm\.sx',

339

r'(?:www\.)?vid\.mint\.lgbt',

340

r'(?:www\.)?vid\.puffyan\.us',

341

r'(?:www\.)?yewtu\.be',

342

r'(?:www\.)?yt\.elukerio\.org',

343

r'(?:www\.)?yt\.lelux\.fi',

344

r'(?:www\.)?invidious\.ggc-project\.de',

345

r'(?:www\.)?yt\.maisputain\.ovh',

346

r'(?:www\.)?ytprivate\.com',

347

r'(?:www\.)?invidious\.13ad\.de',

348

r'(?:www\.)?invidious\.toot\.koeln',

349

r'(?:www\.)?invidious\.fdn\.fr',

350

r'(?:www\.)?watch\.nettohikari\.com',

351

r'(?:www\.)?invidious\.namazso\.eu',

352

r'(?:www\.)?invidious\.silkky\.cloud',

353

r'(?:www\.)?invidious\.exonip\.de',

354

r'(?:www\.)?invidious\.riverside\.rocks',

355

r'(?:www\.)?invidious\.blamefran\.net',

356

r'(?:www\.)?invidious\.moomoo\.de',

357

r'(?:www\.)?ytb\.trom\.tf',

358

r'(?:www\.)?yt\.cyberhost\.uk',

359

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

360

r'(?:www\.)?qklhadlycap4cnod\.onion',

361

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

362

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

363

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

364

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

365

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

366

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

367

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

368

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

369

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

370

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

371

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

372

r'(?:www\.)?piped\.kavin\.rocks',

373

r'(?:www\.)?piped\.tokhmi\.xyz',

374

r'(?:www\.)?piped\.syncpundit\.io',

375

r'(?:www\.)?piped\.mha\.fi',

376

r'(?:www\.)?watch\.whatever\.social',

377

r'(?:www\.)?piped\.garudalinux\.org',

378

r'(?:www\.)?piped\.rivo\.lol',

379

r'(?:www\.)?piped-libre\.kavin\.rocks',

380

r'(?:www\.)?yt\.jae\.fi',

381

r'(?:www\.)?piped\.mint\.lgbt',

382

r'(?:www\.)?il\.ax',

383

r'(?:www\.)?piped\.esmailelbob\.xyz',

384

r'(?:www\.)?piped\.projectsegfau\.lt',

385

r'(?:www\.)?piped\.privacydev\.net',

386

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

387

r'(?:www\.)?piped\.smnz\.de',

388

r'(?:www\.)?piped\.adminforge\.de',

389

r'(?:www\.)?watch\.whatevertinfoil\.de',

390

r'(?:www\.)?piped\.qdi\.fi',

391

r'(?:www\.)?piped\.video',

392

r'(?:www\.)?piped\.aeong\.one',

393

)

394

395

# extracted from account/account_menu ep

396

# XXX: These are the supported YouTube UI and API languages,

397

# which is slightly different from languages supported for translation in YouTube studio

398

_SUPPORTED_LANG_CODES = [

399

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

400

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

401

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

402

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

403

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

404

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

405

]

406

407

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

408

409

@functools.cached_property

410

def _preferred_lang(self):

411

"""

412

Returns a language code supported by YouTube for the user preferred language.

413

Returns None if no preferred language set.

414

"""

415

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

416

if not preferred_lang:

417

return

418

if preferred_lang not in self._SUPPORTED_LANG_CODES:

419

raise ExtractorError(

420

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

421

expected=True)

422

elif preferred_lang != 'en':

423

self.report_warning(

424

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

425

return preferred_lang

426

427

def _initialize_consent(self):

428

cookies = self._get_cookies('https://www.youtube.com/')

429

if cookies.get('__Secure-3PSID'):

430

return

431

consent_id = None

432

consent = cookies.get('CONSENT')

433

if consent:

434

if 'YES' in consent.value:

435

return

436

consent_id = self._search_regex(

437

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

438

if not consent_id:

439

consent_id = random.randint(100, 999)

440

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

441

442

def _initialize_pref(self):

443

cookies = self._get_cookies('https://www.youtube.com/')

444

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

449

except ValueError:

450

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

451

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

452

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

453

454

def _real_initialize(self):

455

self._initialize_pref()

456

self._initialize_consent()

457

self._check_login_required()

458

459

def _check_login_required(self):

460

if self._LOGIN_REQUIRED and not self._cookies_passed:

461

self.raise_login_required('Login details are needed to download this content', method='cookies')

462

463

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

464

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

465

466

def _get_default_ytcfg(self, client='web'):

467

return copy.deepcopy(INNERTUBE_CLIENTS[client])

468

469

def _get_innertube_host(self, client='web'):

470

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

471

472

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

473

# try_get but with fallback to default ytcfg client values when present

474

_func = lambda y: try_get(y, getter, expected_type)

475

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

476

477

def _extract_client_name(self, ytcfg, default_client='web'):

478

return self._ytcfg_get_safe(

479

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

480

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

481

482

def _extract_client_version(self, ytcfg, default_client='web'):

483

return self._ytcfg_get_safe(

484

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

485

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

486

487

def _select_api_hostname(self, req_api_hostname, default_client=None):

488

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

489

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

490

491

def _extract_api_key(self, ytcfg=None, default_client='web'):

492

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

493

494

def _extract_context(self, ytcfg=None, default_client='web'):

495

context = get_first(

496

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

497

# Enforce language and tz for extraction

498

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

499

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

505

time_now = round(time.time())

506

if self._SAPISID is None:

507

yt_cookies = self._get_cookies('https://www.youtube.com')

508

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

509

# See: https://github.com/yt-dlp/yt-dlp/issues/393

510

sapisid_cookie = dict_get(

511

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

512

if sapisid_cookie and sapisid_cookie.value:

513

self._SAPISID = sapisid_cookie.value

514

self.write_debug('Extracted SAPISID cookie')

515

# SAPISID cookie is required if not already present

516

if not yt_cookies.get('SAPISID'):

517

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

518

self._set_cookie(

519

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

520

else:

521

self._SAPISID = False

522

if not self._SAPISID:

523

return None

524

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

525

sapisidhash = hashlib.sha1(

526

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

527

return f'SAPISIDHASH {time_now}_{sapisidhash}'

528

529

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

530

note='Downloading API JSON', errnote='Unable to download API page',

531

context=None, api_key=None, api_hostname=None, default_client='web'):

532

533

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

534

data.update(query)

535

real_headers = self.generate_api_headers(default_client=default_client)

536

real_headers.update({'content-type': 'application/json'})

537

if headers:

538

real_headers.update(headers)

539

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

540

or api_key or self._extract_api_key(default_client=default_client))

541

return self._download_json(

542

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

543

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

544

data=json.dumps(data).encode('utf8'), headers=real_headers,

545

query={'key': api_key, 'prettyPrint': 'false'})

546

547

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

548

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

549

550

@staticmethod

551

def _extract_session_index(*data):

552

"""

553

Index of current account in account list.

554

See: https://github.com/yt-dlp/yt-dlp/pull/519

555

"""

556

for ytcfg in data:

557

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

558

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

563

if ytcfg:

564

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

569

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

570

'identity token', default=None, fatal=False)

571

572

@staticmethod

573

def _extract_account_syncid(*args):

574

"""

575

Extract syncId required to download private playlists of secondary channels

576

@params response and/or ytcfg

577

"""

578

for data in args:

579

# ytcfg includes channel_syncid if on secondary channel

580

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

585

lambda x: x['DATASYNC_ID']), str) or '').split('||')

586

if len(sync_ids) >= 2 and sync_ids[1]:

587

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

588

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

593

"""

594

Extracts visitorData from an API response or ytcfg

595

Appears to be used to track session state

596

"""

597

return get_first(

598

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

599

expected_type=str)

600

601

@functools.cached_property

602

def is_authenticated(self):

603

return bool(self._generate_sapisidhash_header())

604

605

def extract_ytcfg(self, video_id, webpage):

606

if not webpage:

607

return {}

608

return self._parse_json(

609

self._search_regex(

610

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

611

default='{}'), video_id, fatal=False) or {}

612

613

def generate_api_headers(

614

self, *, ytcfg=None, account_syncid=None, session_index=None,

615

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

616

617

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

618

headers = {

619

'X-YouTube-Client-Name': str(

620

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

621

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

622

'Origin': origin,

623

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

624

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

625

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

626

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

627

}

628

if session_index is None:

629

session_index = self._extract_session_index(ytcfg)

630

if account_syncid or session_index is not None:

631

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

632

633

auth = self._generate_sapisidhash_header(origin)

634

if auth is not None:

635

headers['Authorization'] = auth

636

headers['X-Origin'] = origin

637

return filter_dict(headers)

638

639

def _download_ytcfg(self, client, video_id):

640

url = {

641

'web': 'https://www.youtube.com',

642

'web_music': 'https://music.youtube.com',

643

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

648

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

649

return self.extract_ytcfg(video_id, webpage) or {}

650

651

@staticmethod

652

def _build_api_continuation_query(continuation, ctp=None):

653

query = {

654

'continuation': continuation

655

}

656

# TODO: Inconsistency with clickTrackingParams.

657

# Currently we have a fixed ctp contained within context (from ytcfg)

658

# and a ctp in root query for continuation.

659

if ctp:

660

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

665

next_continuation = try_get(

666

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

667

lambda x: x['continuation']['reloadContinuationData']), dict)

668

if not next_continuation:

669

return

670

continuation = next_continuation.get('continuation')

671

if not continuation:

672

return

673

ctp = next_continuation.get('clickTrackingParams')

674

return cls._build_api_continuation_query(continuation, ctp)

675

676

@classmethod

677

def _extract_continuation_ep_data(cls, continuation_ep: dict):

678

if isinstance(continuation_ep, dict):

679

continuation = try_get(

680

continuation_ep, lambda x: x['continuationCommand']['token'], str)

681

if not continuation:

682

return

683

ctp = continuation_ep.get('clickTrackingParams')

684

return cls._build_api_continuation_query(continuation, ctp)

685

686

@classmethod

687

def _extract_continuation(cls, renderer):

688

next_continuation = cls._extract_next_continuation_data(renderer)

689

if next_continuation:

690

return next_continuation

691

692

return traverse_obj(renderer, (

693

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

694

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

695

), get_all=False, expected_type=cls._extract_continuation_ep_data)

696

697

@classmethod

698

def _extract_alerts(cls, data):

699

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

700

if not isinstance(alert_dict, dict):

701

continue

702

for alert in alert_dict.values():

703

alert_type = alert.get('type')

704

if not alert_type:

705

continue

706

message = cls._get_text(alert, 'text')

707

if message:

708

yield alert_type, message

709

710

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

711

errors, warnings = [], []

712

for alert_type, alert_message in alerts:

713

if alert_type.lower() == 'error' and fatal:

714

errors.append([alert_type, alert_message])

715

elif alert_message not in self._IGNORED_WARNINGS:

716

warnings.append([alert_type, alert_message])

717

718

for alert_type, alert_message in (warnings + errors[:-1]):

719

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

720

if errors:

721

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

722

723

def _extract_and_report_alerts(self, data, *args, **kwargs):

724

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

725

726

def _extract_badges(self, renderer: dict):

727

privacy_icon_map = {

728

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

729

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

730

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

735

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

736

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

741

'private': BadgeType.AVAILABILITY_PRIVATE,

742

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

743

'live': BadgeType.LIVE_NOW,

744

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

749

badge_type = (

750

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

751

or badge_style_map.get(traverse_obj(badge, 'style'))

752

)

753

if badge_type:

754

badges.append({'type': badge_type})

755

continue

756

757

# fallback, won't work in some languages

758

label = traverse_obj(badge, 'label', expected_type=str, default='')

759

for match, label_badge_type in label_map.items():

760

if match in label.lower():

761

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

768

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

769

770

@staticmethod

771

def _get_text(data, *path_list, max_runs=None):

772

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

777

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

778

obj = [obj]

779

for item in obj:

780

text = try_get(item, lambda x: x['simpleText'], str)

781

if text:

782

return text

783

runs = try_get(item, lambda x: x['runs'], list) or []

784

if not runs and isinstance(item, list):

785

runs = item

786

787

runs = runs[:min(len(runs), max_runs or len(runs))]

788

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

793

count_text = self._get_text(data, *path_list) or ''

794

count = parse_count(count_text)

795

if count is None:

796

count = str_to_int(

797

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

802

"""

803

Extract thumbnails from thumbnails dict

804

@param path_list: path list to level that contains 'thumbnails' key

805

"""

806

thumbnails = []

807

for path in path_list or [()]:

808

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

809

thumbnail_url = url_or_none(thumbnail.get('url'))

810

if not thumbnail_url:

811

continue

812

# Sometimes youtube gives a wrong thumbnail URL. See:

813

# https://github.com/yt-dlp/yt-dlp/issues/233

814

# https://github.com/ytdl-org/youtube-dl/issues/28023

815

if 'maxresdefault' in thumbnail_url:

816

thumbnail_url = thumbnail_url.split('?')[0]

817

thumbnails.append({

818

'url': thumbnail_url,

819

'height': int_or_none(thumbnail.get('height')),

820

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

826

"""

827

Extracts a relative time from string and converts to dt object

828

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

833

if start:

834

return datetime_from_str(start)

835

try:

836

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

841

if not text:

842

return

843

dt = self.extract_relative_time(text)

844

timestamp = None

845

if isinstance(dt, datetime.datetime):

846

timestamp = calendar.timegm(dt.timetuple())

847

848

if timestamp is None:

849

timestamp = (

850

unified_timestamp(text) or unified_timestamp(

851

self._search_regex(

852

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

853

text.lower(), 'time text', default=None)))

854

855

if text and timestamp is None and self._preferred_lang in (None, 'en'):

856

self.report_warning(

857

f'Cannot parse localized time text "{text}"', only_once=True)

858

return timestamp

859

860

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

861

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

862

default_client='web'):

863

for retry in self.RetryManager():

864

try:

865

response = self._call_api(

866

ep=ep, fatal=True, headers=headers,

867

video_id=item_id, query=query, note=note,

868

context=self._extract_context(ytcfg, default_client),

869

api_key=self._extract_api_key(ytcfg, default_client),

870

api_hostname=api_hostname, default_client=default_client)

871

except ExtractorError as e:

872

if not isinstance(e.cause, network_exceptions):

873

return self._error_or_warning(e, fatal=fatal)

874

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

879

if not is_html(first_bytes):

880

yt_error = try_get(

881

self._parse_json(

882

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

883

lambda x: x['error']['message'], str)

884

if yt_error:

885

self._report_alerts([('ERROR', yt_error)], fatal=False)

886

# Downloading page may result in intermittent 5xx HTTP error

887

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

888

# We also want to catch all other network exceptions since errors in later pages can be troublesome

889

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

890

if e.cause.code not in (403, 429):

891

retry.error = e

892

continue

893

return self._error_or_warning(e, fatal=fatal)

894

895

try:

896

self._extract_and_report_alerts(response, only_once=True)

897

except ExtractorError as e:

898

# YouTube servers may return errors we want to retry on in a 200 OK response

899

# See: https://github.com/yt-dlp/yt-dlp/issues/839

900

if 'unknown error' in e.msg.lower():

901

retry.error = e

902

continue

903

return self._error_or_warning(e, fatal=fatal)

904

# Youtube sometimes sends incomplete data

905

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

906

if not traverse_obj(response, *variadic(check_get_keys)):

907

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

914

return re.match(r'https?://music\.youtube\.com/', url) is not None

915

916

def _extract_video(self, renderer):

917

video_id = renderer.get('videoId')

918

919

reel_header_renderer = traverse_obj(renderer, (

920

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

921

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

922

923

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

924

description = self._get_text(renderer, 'descriptionSnippet')

925

926

duration = int_or_none(renderer.get('lengthSeconds'))

927

if duration is None:

928

duration = parse_duration(self._get_text(

929

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

930

if duration is None:

931

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

932

duration = parse_duration(self._search_regex(

933

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

934

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

935

video_id, default=None, group='duration'))

936

937

channel_id = traverse_obj(

938

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

939

expected_type=str, get_all=False)

940

if not channel_id:

941

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

942

943

overlay_style = traverse_obj(

944

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

945

get_all=False, expected_type=str)

946

badges = self._extract_badges(renderer)

947

948

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

949

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

950

expected_type=str)) or ''

951

url = f'https://www.youtube.com/watch?v={video_id}'

952

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

953

url = f'https://www.youtube.com/shorts/{video_id}'

954

955

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

956

or self._get_text(reel_header_renderer, 'timestampText') or '')

957

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

958

959

live_status = (

960

'is_upcoming' if scheduled_timestamp is not None

961

else 'was_live' if 'streamed' in time_text.lower()

962

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

963

else None)

964

965

# videoInfo is a string like '50K views • 10 years ago'.

966

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

967

view_count = (0 if 'no views' in view_count_text.lower()

968

else self._get_count({'simpleText': view_count_text}))

969

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

978

'duration': duration,

979

'channel_id': channel_id,

980

'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')

981

or self._get_text(reel_header_renderer, 'channelTitleText')),

982

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

983

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

984

'timestamp': (self._parse_time_text(time_text)

985

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

986

else None),

987

'release_timestamp': scheduled_timestamp,

988

'availability':

989

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

990

else self._availability(

991

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

992

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

993

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

994

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

995

view_count_field: view_count,

996

'live_status': live_status

}

class YoutubeIE(YoutubeBaseInfoExtractor):

1001

IE_DESC = 'YouTube'

1002

_VALID_URL = r"""(?x)^

1003

(

1004

(?:https?://|//) # http(s):// or protocol-independent URL

1005

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1006

(?:www\.)?deturl\.com/www\.youtube\.com|

1007

(?:www\.)?pwnyoutube\.com|

1008

(?:www\.)?hooktube\.com|

1009

(?:www\.)?yourepeat\.com|

1010

tube\.majestyc\.net|

1011

%(invidious)s|

1012

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1013

(?:.*?\#/)? # handle anchor (#/) redirect urls

1014

(?: # the various things that can precede the ID:

1015

1016

|(?: # or the v= param in all its forms

1017

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1018

(?:\?|\#!?) # the params delimiter ? or # or #!

1019

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1025

vid\.plus| # or vid.plus/xxxx

1026

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1027

%(invidious)s

1028

)/

1029

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1030

)

1031

)? # all until now is optional -> you can pass the naked ID

1032

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1033

(?(1).+)? # if we found the ID, everything can follow

1034

(?:\#|$)""" % {

1035

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1049

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1050

\1''',

1051

# https://wordpress.org/plugins/lazy-load-for-videos/

1052

r'''(?xs)

1053

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1054

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1055

]

1056

_RETURN_TYPE = 'video' # XXX: How to handle multifeed?

1057

1058

_PLAYER_INFO_RE = (

1059

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1060

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1061

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1062

)

1063

_formats = {

1064

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1065

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1066

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1067

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1068

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1069

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1070

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1071

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1072

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1073

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1074

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1075

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1076

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1077

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1078

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1079

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1080

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1081

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1086

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1087

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1088

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1089

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1090

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1091

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1092

1093

# Apple HTTP Live Streaming

1094

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1095

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1096

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1097

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1098

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1099

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1100

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1101

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1102

1103

# DASH mp4 video

1104

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1105

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1106

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1107

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1108

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1109

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1110

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1111

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1112

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1113

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1114

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1115

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1116

1117

# Dash mp4 audio

1118

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1119

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1120

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1121

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1122

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1123

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1124

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1125

1126

# Dash webm

1127

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1128

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1129

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1130

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1131

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1132

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1133

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1134

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1135

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1136

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1137

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1138

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1139

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1140

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1141

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1142

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1143

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1144

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1145

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1146

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1147

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1148

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1149

1150

# Dash webm audio

1151

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1152

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1153

1154

# Dash webm audio with opus inside

1155

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1156

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1157

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1158

1159

# RTMP (unnamed)

1160

'_rtmp': {'protocol': 'rtmp'},

1161

1162

# av01 video only formats sometimes served with "unknown" codecs

1163

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1164

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1165

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1166

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1167

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1168

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1169

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1170

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1171

}

1172

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1184

'uploader': 'Philipp Hagemeister',

1185

'uploader_id': 'phihag',

1186

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1187

'channel': 'Philipp Hagemeister',

1188

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1189

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1190

'upload_date': '20121002',

1191

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1192

'categories': ['Science & Technology'],

1193

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1198

'playable_in_embed': True,

1199

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1200

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1205

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1210

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1215

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1216

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1217

'uploader': 'SET India',

1218

'uploader_id': 'setindia',

1219

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1220

'age_limit': 18,

1221

},

1222

'skip': 'Private video',

1223

},

1224

{

1225

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1226

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1231

'uploader': 'Philipp Hagemeister',

1232

'uploader_id': 'phihag',

1233

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1234

'channel': 'Philipp Hagemeister',

1235

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1236

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1237

'upload_date': '20121002',

1238

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1239

'categories': ['Science & Technology'],

1240

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1245

'playable_in_embed': True,

1246

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1247

'live_status': 'not_live',

1248

'age_limit': 0,

1249

'comment_count': int,

1250

'channel_follower_count': int

1251

},

1252

'params': {

1253

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1258

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1263

'uploader_id': '8KVIDEO',

1264

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1265

'description': '',

1266

'uploader': '8KVIDEO',

1267

'title': 'UHDTV TEST 8K VIDEO.mp4'

1268

},

1269

'params': {

1270

'youtube_include_dash_manifest': True,

1271

'format': '141',

1272

},

1273

'skip': 'format 141 not served anymore',

1274

},

1275

# DASH manifest with encrypted signature

1276

{

1277

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1282

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1283

'duration': 244,

1284

'uploader': 'AfrojackVEVO',

1285

'uploader_id': 'AfrojackVEVO',

1286

'upload_date': '20131011',

1287

'abr': 129.495,

1288

'like_count': int,

1289

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1290

'playable_in_embed': True,

1291

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1292

'view_count': int,

1293

'track': 'The Spark',

1294

'live_status': 'not_live',

1295

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1296

'channel': 'Afrojack',

1297

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1298

'tags': 'count:19',

1299

'availability': 'public',

1300

'categories': ['Music'],

1301

'age_limit': 0,

1302

'alt_title': 'The Spark',

1303

'channel_follower_count': int

1304

},

1305

'params': {

1306

'youtube_include_dash_manifest': True,

1307

'format': '141/bestaudio[ext=m4a]',

1308

},

1309

},

1310

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1311

{

1312

'note': 'Embed allowed age-gate video',

1313

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1318

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1319

'duration': 142,

1320

'uploader': 'The Witcher',

1321

'uploader_id': 'WitcherGame',

1322

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1323

'upload_date': '20140605',

1324

'age_limit': 18,

1325

'categories': ['Gaming'],

1326

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1327

'availability': 'needs_auth',

1328

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1329

'like_count': int,

1330

'channel': 'The Witcher',

1331

'live_status': 'not_live',

1332

'tags': 'count:17',

1333

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1334

'playable_in_embed': True,

1335

'view_count': int,

1336

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1341

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1346

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1347

'upload_date': '20200408',

1348

'uploader_id': 'FlyingKitty900',

1349

'uploader': 'FlyingKitty',

1350

'age_limit': 18,

1351

'availability': 'needs_auth',

1352

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1353

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1354

'channel': 'FlyingKitty',

1355

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1356

'view_count': int,

1357

'categories': ['Entertainment'],

1358

'live_status': 'not_live',

1359

'tags': ['Flyingkitty', 'godzilla 2'],

1360

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1361

'like_count': int,

1362

'duration': 177,

1363

'playable_in_embed': True,

1364

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1369

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1370

'info_dict': {

1371

'id': 'Tq92D6wQ1mg',

1372

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1373

'ext': 'mp4',

1374

'upload_date': '20191228',

1375

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1376

'uploader': 'Projekt Melody',

1377

'description': 'md5:17eccca93a786d51bc67646756894066',

1378

'age_limit': 18,

1379

'like_count': int,

1380

'availability': 'needs_auth',

1381

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1382

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1383

'view_count': int,

1384

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1385

'channel': 'Projekt Melody',

1386

'live_status': 'not_live',

1387

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1388

'playable_in_embed': True,

1389

'categories': ['Entertainment'],

1390

'duration': 106,

1391

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1392

'comment_count': int,

1393

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1398

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1403

'uploader': 'Herr Lurik',

1404

'uploader_id': 'st3in234',

1405

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1406

'upload_date': '20130730',

1407

'track': 'Such mich find mich',

1408

'age_limit': 0,

1409

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1410

'like_count': int,

1411

'playable_in_embed': False,

1412

'creator': 'OOMPH!',

1413

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1414

'view_count': int,

1415

'alt_title': 'Such mich find mich',

1416

'duration': 210,

1417

'channel': 'Herr Lurik',

1418

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1419

'categories': ['Music'],

1420

'availability': 'public',

1421

'uploader_url': 'http://www.youtube.com/user/st3in234',

1422

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1423

'live_status': 'not_live',

1424

'artist': 'OOMPH!',

1425

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1430

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1431

'only_matching': True,

1432

},

1433

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1434

# YouTube Red ad is not captured for creator

1435

{

1436

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1442

'uploader_id': 'deadmau5',

1443

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1444

'creator': 'deadmau5',

1445

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1446

'uploader': 'deadmau5',

1447

'title': 'Deadmau5 - Some Chords (HD)',

1448

'alt_title': 'Some Chords',

1449

'availability': 'public',

1450

'tags': 'count:14',

1451

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1452

'view_count': int,

1453

'live_status': 'not_live',

1454

'channel': 'deadmau5',

1455

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1456

'like_count': int,

1457

'track': 'Some Chords',

1458

'artist': 'deadmau5',

1459

'playable_in_embed': True,

1460

'age_limit': 0,

1461

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1462

'categories': ['Music'],

1463

'album': 'Some Chords',

1464

'channel_follower_count': int

1465

},

1466

'expected_warnings': [

1467

'DASH manifest missing',

1468

]

1469

},

1470

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1471

{

1472

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1478

'uploader_id': 'olympic',

1479

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1480

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1481

'uploader': 'Olympics',

1482

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1483

'like_count': int,

1484

'release_timestamp': 1343767800,

1485

'playable_in_embed': True,

1486

'categories': ['Sports'],

1487

'release_date': '20120731',

1488

'channel': 'Olympics',

1489

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1490

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1491

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1492

'age_limit': 0,

1493

'availability': 'public',

1494

'live_status': 'was_live',

1495

'view_count': int,

1496

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1497

'channel_follower_count': int

1498

},

1499

'params': {

1500

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1510

'duration': 85,

1511

'upload_date': '20110310',

1512

'uploader_id': 'AllenMeow',

1513

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1514

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1515

'uploader': '孫ᄋᄅ',

1516

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1517

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1522

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1523

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1524

'view_count': int,

1525

'categories': ['People & Blogs'],

1526

'like_count': int,

1527

'live_status': 'not_live',

1528

'availability': 'unlisted',

1529

'comment_count': int,

1530

'channel_follower_count': int

1531

},

1532

},

1533

# url_encoded_fmt_stream_map is empty string

1534

{

1535

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1540

'description': '',

1541

'upload_date': '20150404',

1542

'uploader_id': 'spbelect',

1543

'uploader': 'Наблюдатели Петербурга',

1544

},

1545

'params': {

1546

'skip_download': 'requires avconv',

1547

},

1548

'skip': 'This live event has ended.',

1549

},

1550

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1551

{

1552

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1557

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1558

'duration': 220,

1559

'upload_date': '20150625',

1560

'uploader_id': 'dorappi2000',

1561

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1562

'uploader': 'dorappi2000',

1563

'formats': 'mincount:31',

1564

},

1565

'skip': 'not actual anymore',

1566

},

1567

# DASH manifest with segment_list

1568

{

1569

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1570

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1575

'uploader': 'Airtek',

1576

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1577

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1578

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1579

},

1580

'params': {

1581

'youtube_include_dash_manifest': True,

1582

'format': '135', # bestvideo

1583

},

1584

'skip': 'This live event has ended.',

1585

},

1586

{

1587

# Multifeed videos (multiple cameras), URL can be of any Camera

1588

'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',

1589

'info_dict': {

1590

'id': 'zaPI8MvL8pg',

1591

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',

1592

'description': 'md5:563ccbc698b39298481ca3c571169519',

},

'playlist': [{

'info_dict': {

'id': 'j5yGuxZ8lLU',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',

1599

'uploader': 'WiiLikeToPlay',

1600

'description': 'md5:563ccbc698b39298481ca3c571169519',

1601

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1602

'duration': 10120,

1603

'channel_follower_count': int,

1604

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1605

'availability': 'public',

1606

'playable_in_embed': True,

1607

'upload_date': '20131105',

1608

'uploader_id': 'WiiRikeToPray',

1609

'categories': ['Gaming'],

1610

'live_status': 'was_live',

1611

'tags': 'count:24',

1612

'release_timestamp': 1383701910,

1613

'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',

1614

'comment_count': int,

1615

'age_limit': 0,

1616

'like_count': int,

1617

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1618

'channel': 'WiiLikeToPlay',

1619

'view_count': int,

1620

'release_date': '20131106',

},

}, {

'info_dict': {

'id': 'zaPI8MvL8pg',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',

1627

'uploader_id': 'WiiRikeToPray',

1628

'availability': 'public',

1629

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1630

'channel': 'WiiLikeToPlay',

1631

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1632

'channel_follower_count': int,

1633

'description': 'md5:563ccbc698b39298481ca3c571169519',

'duration': 10108,

'age_limit': 0,

'like_count': int,

'tags': 'count:24',

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1639

'uploader': 'WiiLikeToPlay',

1640

'release_timestamp': 1383701915,

1641

'comment_count': int,

1642

'upload_date': '20131105',

1643

'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',

1644

'release_date': '20131106',

1645

'playable_in_embed': True,

1646

'live_status': 'was_live',

1647

'categories': ['Gaming'],

'view_count': int,

},

}, {

'info_dict': {

'id': 'R7r3vfO7Hao',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',

1655

'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',

1656

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1657

'like_count': int,

1658

'availability': 'public',

1659

'playable_in_embed': True,

1660

'upload_date': '20131105',

1661

'description': 'md5:563ccbc698b39298481ca3c571169519',

1662

'uploader_id': 'WiiRikeToPray',

1663

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1664

'channel_follower_count': int,

1665

'tags': 'count:24',

1666

'release_date': '20131106',

1667

'uploader': 'WiiLikeToPlay',

1668

'comment_count': int,

1669

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1670

'channel': 'WiiLikeToPlay',

1671

'categories': ['Gaming'],

1672

'release_timestamp': 1383701914,

1673

'live_status': 'was_live',

'age_limit': 0,

'duration': 10128,

'view_count': int,

},

}],

'params': {'skip_download': True},

1680

},

1681

{

1682

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1683

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1684

'info_dict': {

1685

'id': 'gVfLd0zydlo',

1686

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1687

},

1688

'playlist_count': 2,

1689

'skip': 'Not multifeed anymore',

1690

},

1691

{

1692

'url': 'https://vid.plus/FlRa-iH7PGw',

1693

'only_matching': True,

1694

},

1695

{

1696

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1697

'only_matching': True,

1698

},

1699

{

1700

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1701

# Also tests cut-off URL expansion in video description (see

1702

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1703

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1704

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1709

'alt_title': 'Dark Walk',

1710

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1711

'duration': 133,

1712

'upload_date': '20151119',

1713

'uploader_id': 'IronSoulElf',

1714

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1715

'uploader': 'IronSoulElf',

1716

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1717

'track': 'Dark Walk',

1718

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1719

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1720

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1721

'categories': ['Film & Animation'],

1722

'view_count': int,

1723

'live_status': 'not_live',

1724

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1725

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1726

'tags': 'count:13',

1727

'availability': 'public',

1728

'channel': 'IronSoulElf',

1729

'playable_in_embed': True,

1730

'like_count': int,

1731

'age_limit': 0,

1732

'channel_follower_count': int

1733

},

1734

'params': {

1735

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1740

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1741

'only_matching': True,

1742

},

1743

{

1744

# Video with yt:stretch=17:0

1745

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1750

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1751

'upload_date': '20151107',

1752

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1753

'uploader': 'CH GAMER DROID',

1754

},

1755

'params': {

1756

'skip_download': True,

1757

},

1758

'skip': 'This video does not exist.',

1759

},

1760

{

1761

# Video with incomplete 'yt:stretch=16:'

1762

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1763

'only_matching': True,

1764

},

1765

{

1766

# Video licensed under Creative Commons

1767

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1772

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1773

'duration': 721,

1774

'upload_date': '20150128',

1775

'uploader_id': 'BerkmanCenter',

1776

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1777

'uploader': 'The Berkman Klein Center for Internet & Society',

1778

'license': 'Creative Commons Attribution license (reuse allowed)',

1779

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1780

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1781

'like_count': int,

1782

'age_limit': 0,

1783

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1784

'channel': 'The Berkman Klein Center for Internet & Society',

1785

'availability': 'public',

1786

'view_count': int,

1787

'categories': ['Education'],

1788

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1789

'live_status': 'not_live',

1790

'playable_in_embed': True,

1791

'comment_count': int,

1792

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1801

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1806

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1807

'duration': 4060,

1808

'upload_date': '20151120',

1809

'uploader': 'Bernie Sanders',

1810

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1811

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1812

'license': 'Creative Commons Attribution license (reuse allowed)',

1813

'playable_in_embed': True,

1814

'tags': 'count:12',

1815

'like_count': int,

1816

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1817

'age_limit': 0,

1818

'availability': 'public',

1819

'categories': ['News & Politics'],

1820

'channel': 'Bernie Sanders',

1821

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1822

'view_count': int,

1823

'live_status': 'not_live',

1824

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1825

'comment_count': int,

1826

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1835

'only_matching': True,

1836

},

1837

{

1838

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1839

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1840

'only_matching': True,

1841

},

1842

{

1843

# Rental video preview

1844

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1849

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1850

'upload_date': '20150811',

1851

'uploader': 'FlixMatrix',

1852

'uploader_id': 'FlixMatrixKaravan',

1853

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1854

'license': 'Standard YouTube License',

1855

},

1856

'params': {

1857

'skip_download': True,

1858

},

1859

'skip': 'This video is not available.',

1860

},

1861

{

1862

# YouTube Red video with episode data

1863

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1868

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1869

'duration': 2085,

1870

'upload_date': '20170118',

1871

'uploader': 'Vsauce',

1872

'uploader_id': 'Vsauce',

1873

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1874

'series': 'Mind Field',

1875

'season_number': 1,

1876

'episode_number': 1,

1877

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1878

'tags': 'count:12',

1879

'view_count': int,

1880

'availability': 'public',

1881

'age_limit': 0,

1882

'channel': 'Vsauce',

1883

'episode': 'Episode 1',

1884

'categories': ['Entertainment'],

1885

'season': 'Season 1',

1886

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1887

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1888

'like_count': int,

1889

'playable_in_embed': True,

1890

'live_status': 'not_live',

1891

'channel_follower_count': int

1892

},

1893

'params': {

1894

'skip_download': True,

1895

},

1896

'expected_warnings': [

1897

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1902

# as inappropriate or offensive to some audiences.

1903

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1908

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1909

'duration': 965,

1910

'upload_date': '20140124',

1911

'uploader': 'New Century Foundation',

1912

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1913

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1914

},

1915

'params': {

1916

'skip_download': True,

1917

},

1918

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1923

'only_matching': True,

1924

},

1925

{

1926

# geo restricted to JP

1927

'url': 'sJL6WA-aGkQ',

1928

'only_matching': True,

1929

},

1930

{

1931

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1932

'only_matching': True,

1933

},

1934

{

1935

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1936

'only_matching': True,

1937

},

1938

{

1939

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1940

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1941

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1946

'only_matching': True,

1947

},

1948

{

1949

# Video with unsupported adaptive stream type formats

1950

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1955

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1956

'duration': 433,

1957

'upload_date': '20130923',

1958

'uploader': 'Amelia Putri Harwita',

1959

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1960

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1961

'formats': 'maxcount:10',

1962

},

1963

'params': {

1964

'skip_download': True,

1965

'youtube_include_dash_manifest': False,

1966

},

1967

'skip': 'not actual anymore',

1968

},

1969

{

1970

# Youtube Music Auto-generated description

1971

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1976

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1977

'upload_date': '20190312',

1978

'uploader': 'Stephen - Topic',

1979

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1980

'artist': 'Stephen',

1981

'track': 'Voyeur Girl',

1982

'album': 'it\'s too much love to know my dear',

1983

'release_date': '20190313',

1984

'release_year': 2019,

1985

'alt_title': 'Voyeur Girl',

1986

'view_count': int,

1987

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1988

'playable_in_embed': True,

1989

'like_count': int,

1990

'categories': ['Music'],

1991

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1992

'channel': 'Stephen',

1993

'availability': 'public',

1994

'creator': 'Stephen',

1995

'duration': 169,

1996

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1997

'age_limit': 0,

1998

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1999

'tags': 'count:11',

2000

'live_status': 'not_live',

2001

'channel_follower_count': int

2002

},

2003

'params': {

2004

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

2009

'only_matching': True,

2010

},

2011

{

2012

# invalid -> valid video id redirection

2013

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

2018

'description': 'md5:bf577a41da97918e94fa9798d9228825',

2019

'upload_date': '20090125',

2020

'uploader': 'Prochorowka',

2021

'uploader_id': 'Prochorowka',

2022

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

2023

'artist': 'Panjabi MC',

2024

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

2025

'album': 'Beware of the Boys (Mundian To Bach Ke)',

2026

},

2027

'params': {

2028

'skip_download': True,

2029

},

2030

'skip': 'Video unavailable',

2031

},

2032

{

2033

# empty description results in an empty string

2034

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2041

'uploader_id': 'ElevageOrVert',

2042

'uploader': 'ElevageOrVert',

2043

'view_count': int,

2044

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2045

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

2046

'like_count': int,

2047

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2048

'tags': [],

2049

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2050

'availability': 'public',

2051

'age_limit': 0,

2052

'categories': ['Pets & Animals'],

2053

'duration': 7,

2054

'playable_in_embed': True,

2055

'live_status': 'not_live',

2056

'channel': 'ElevageOrVert',

2057

'channel_follower_count': int

2058

},

2059

'params': {

2060

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2065

# see [2] for an example with '};' inside ytInitialPlayerResponse

2066

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2067

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2068

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2073

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2074

'upload_date': '20130831',

2075

'uploader_id': 'kudvenkat',

2076

'uploader': 'kudvenkat',

2077

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2078

'like_count': int,

2079

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2080

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2081

'live_status': 'not_live',

2082

'categories': ['Education'],

2083

'availability': 'public',

2084

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2085

'tags': 'count:12',

2086

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2091

'comment_count': int,

2092

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2101

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2102

'only_matching': True,

2103

},

2104

{

2105

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2106

'only_matching': True,

2107

},

2108

{

2109

# https://github.com/ytdl-org/youtube-dl/pull/28094

2110

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2116

'upload_date': '20141120',

2117

'uploader': 'The Cinematic Orchestra - Topic',

2118

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2119

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2120

'artist': 'The Cinematic Orchestra',

2121

'track': 'Burn Out',

2122

'album': 'Every Day',

2123

'like_count': int,

2124

'live_status': 'not_live',

2125

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2130

'creator': 'The Cinematic Orchestra',

2131

'channel': 'The Cinematic Orchestra',

2132

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2133

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2134

'availability': 'public',

2135

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2136

'categories': ['Music'],

2137

'playable_in_embed': True,

2138

'channel_follower_count': int

2139

},

2140

'params': {

2141

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2146

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2147

'only_matching': True,

2148

},

2149

{

2150

# controversial video, requires bpctr/contentCheckOk

2151

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2156

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2157

'uploader': 'CBS Mornings',

2158

'uploader_id': 'CBSThisMorning',

2159

'upload_date': '20140716',

2160

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2161

'duration': 170,

2162

'categories': ['News & Politics'],

2163

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2164

'view_count': int,

2165

'channel': 'CBS Mornings',

2166

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2167

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2168

'age_limit': 18,

2169

'availability': 'needs_auth',

2170

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2171

'like_count': int,

2172

'live_status': 'not_live',

2173

'playable_in_embed': True,

2174

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2179

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2184

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2185

'upload_date': '20201120',

2186

'uploader': 'Walk around Japan',

2187

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2188

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2189

'duration': 1456,

2190

'categories': ['Travel & Events'],

2191

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2192

'view_count': int,

2193

'channel': 'Walk around Japan',

2194

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2195

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2196

'age_limit': 0,

2197

'availability': 'public',

2198

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2199

'live_status': 'not_live',

2200

'playable_in_embed': True,

2201

'channel_follower_count': int

2202

},

2203

'params': {

2204

'skip_download': True,

2205

},

2206

}, {

2207

# Has multiple audio streams

2208

'url': 'WaOKSUlf4TM',

2209

'only_matching': True

2210

}, {

2211

# Requires Premium: has format 141 when requested using YTM url

2212

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2213

'only_matching': True

2214

}, {

2215

# multiple subtitles with same lang_code

2216

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2217

'only_matching': True,

2218

}, {

2219

# Force use android client fallback

2220

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2221

'info_dict': {

2222

'id': 'YOelRv7fMxY',

2223

'title': 'DIGGING A SECRET TUNNEL Part 1',

2224

'ext': '3gp',

2225

'upload_date': '20210624',

2226

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2227

'uploader': 'colinfurze',

2228

'uploader_id': 'colinfurze',

2229

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2230

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2231

'duration': 596,

2232

'categories': ['Entertainment'],

2233

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2234

'view_count': int,

2235

'channel': 'colinfurze',

2236

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2237

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2238

'age_limit': 0,

2239

'availability': 'public',

2240

'like_count': int,

2241

'live_status': 'not_live',

2242

'playable_in_embed': True,

2243

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2248

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2253

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2254

'only_matching': True,

2255

'params': {

2256

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2261

'only_matching': True,

2262

}, {

2263

'note': 'Storyboards',

2264

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2270

'uploader_id': 'scishow',

2271

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2272

'upload_date': '20140324',

2273

'uploader': 'SciShow',

2274

'like_count': int,

2275

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2276

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2277

'view_count': int,

2278

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2279

'playable_in_embed': True,

2280

'tags': 'count:12',

2281

'uploader_url': 'http://www.youtube.com/user/scishow',

2282

'availability': 'public',

2283

'channel': 'SciShow',

2284

'live_status': 'not_live',

2285

'duration': 248,

2286

'categories': ['Education'],

2287

'age_limit': 0,

2288

'channel_follower_count': int,

2289

'chapters': list,

2290

}, 'params': {'format': 'mhtml', 'skip_download': True}

2291

}, {

2292

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2293

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2298

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2299

'uploader': 'Leon Nguyen',

2300

'uploader_id': 'VNSXIII',

2301

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2302

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2303

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2308

'tags': 'count:23',

2309

'playable_in_embed': True,

2310

'live_status': 'not_live',

2311

'upload_date': '20220103',

2312

'like_count': int,

2313

'availability': 'public',

2314

'channel': 'Leon Nguyen',

2315

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2316

'comment_count': int,

2317

'channel_follower_count': int

2318

}

2319

}, {

2320

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2321

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2326

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2327

'uploader': 'Leon Nguyen',

2328

'uploader_id': 'VNSXIII',

2329

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2330

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2331

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2336

'tags': 'count:23',

2337

'playable_in_embed': True,

2338

'live_status': 'not_live',

2339

'upload_date': '20220102',

2340

'like_count': int,

2341

'availability': 'public',

2342

'channel': 'Leon Nguyen',

2343

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2344

'comment_count': int,

2345

'channel_follower_count': int

2346

},

2347

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2348

}, {

2349

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2350

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2355

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2356

'uploader': 'Quackity',

2357

'uploader_id': 'QuackityHQ',

2358

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2359

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2360

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2365

'tags': 'count:26',

2366

'playable_in_embed': True,

2367

'live_status': 'not_live',

2368

'release_timestamp': 1641172509,

2369

'release_date': '20220103',

2370

'upload_date': '20220103',

2371

'like_count': int,

2372

'availability': 'public',

2373

'channel': 'Quackity',

2374

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2375

'channel_follower_count': int

2376

}

2377

},

2378

{ # continuous livestream. Microformat upload date should be preferred.

2379

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2380

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2381

'info_dict': {

2382

'id': 'kgx4WGK0oNU',

2383

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2384

'ext': 'mp4',

2385

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2386

'availability': 'public',

2387

'age_limit': 0,

2388

'release_timestamp': 1637975704,

2389

'upload_date': '20210619',

2390

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2391

'live_status': 'is_live',

2392

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2393

'uploader': '阿鲍Abao',

2394

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2395

'channel': 'Abao in Tokyo',

2396

'channel_follower_count': int,

2397

'release_date': '20211127',

2398

'tags': 'count:39',

2399

'categories': ['People & Blogs'],

2400

'like_count': int,

2401

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2402

'view_count': int,

2403

'playable_in_embed': True,

2404

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2405

'concurrent_view_count': int,

2406

},

2407

'params': {'skip_download': True}

2408

}, {

2409

# Story. Requires specific player params to work.

2410

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2415

'view_count': int,

2416

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2417

'upload_date': '20220526',

2418

'categories': ['Education'],

2419

'title': 'Story',

2420

'channel': 'IT\'S HISTORY',

2421

'description': '',

2422

'uploader_id': 'BlastfromthePast',

2423

'duration': 12,

2424

'uploader': 'IT\'S HISTORY',

2425

'playable_in_embed': True,

2426

'age_limit': 0,

2427

'live_status': 'not_live',

2428

'tags': [],

2429

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2430

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2431

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2432

},

2433

'skip': 'stories get removed after some period of time',

2434

}, {

2435

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2440

'upload_date': '20220323',

2441

'like_count': int,

2442

'availability': 'unlisted',

2443

'channel': 'nao20010128nao',

2444

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2445

'age_limit': 0,

2446

'uploader': 'nao20010128nao',

2447

'uploader_id': 'nao20010128nao',

2448

'categories': ['Music'],

2449

'view_count': int,

2450

'description': '',

2451

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2452

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2453

'live_status': 'not_live',

2454

'playable_in_embed': True,

2455

'channel_follower_count': int,

2456

'duration': 6,

2457

'tags': [],

2458

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2459

}

2460

}, {

2461

# Prefer primary title+description language metadata by default

2462

# Do not prefer translated description if primary is empty

2463

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2468

'description': '',

2469

'channel': 'cole-dlp-test-acc',

2470

'tags': [],

2471

'view_count': int,

2472

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2473

'like_count': int,

2474

'playable_in_embed': True,

2475

'availability': 'unlisted',

2476

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2477

'age_limit': 0,

2478

'duration': 5,

2479

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2480

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2481

'live_status': 'not_live',

2482

'upload_date': '20220908',

2483

'categories': ['People & Blogs'],

2484

'uploader': 'cole-dlp-test-acc',

2485

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2486

},

2487

'params': {'skip_download': True}

2488

}, {

2489

# Extractor argument: prefer translated title+description

2490

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2495

'tags': [],

2496

'duration': 5,

2497

'live_status': 'not_live',

2498

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2499

'upload_date': '20220728',

2500

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2501

'view_count': int,

2502

'categories': ['People & Blogs'],

2503

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2504

'title': 'dlp test video title translated (fr)',

2505

'availability': 'public',

2506

'uploader': 'cole-dlp-test-acc',

2507

'age_limit': 0,

2508

'description': 'dlp test video description translated (fr)',

2509

'playable_in_embed': True,

2510

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2511

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2512

},

2513

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2514

'expected_warnings': [r'Preferring "fr" translated fields'],

2515

}, {

2516

'note': '6 channel audio',

2517

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2518

'only_matching': True,

2519

}, {

2520

'note': 'Multiple HLS formats with same itag',

2521

'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',

'info_dict': {

'id': 'kX3nB4PpJko',

'ext': 'mp4',

'categories': ['Entertainment'],

2526

'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',

2527

'uploader_url': 'http://www.youtube.com/user/MrBeast6000',

2528

'live_status': 'not_live',

2529

'duration': 937,

2530

'channel_follower_count': int,

2531

'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',

2532

'title': 'Last To Take Hand Off Jet, Keeps It!',

2533

'channel': 'MrBeast',

2534

'playable_in_embed': True,

2535

'view_count': int,

2536

'upload_date': '20221112',

2537

'uploader': 'MrBeast',

2538

'uploader_id': 'MrBeast6000',

2539

'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',

2540

'age_limit': 0,

2541

'availability': 'public',

2542

'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',

'like_count': int,

'tags': [],

},

'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},

2547

}, {

2548

'note': 'Audio formats with Dynamic Range Compression',

2549

'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',

'info_dict': {

'id': 'Tq92D6wQ1mg',

'ext': 'weba',

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

2554

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2555

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2556

'channel_follower_count': int,

2557

'description': 'md5:17eccca93a786d51bc67646756894066',

2558

'upload_date': '20191228',

2559

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

2560

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

2561

'playable_in_embed': True,

2562

'like_count': int,

2563

'categories': ['Entertainment'],

2564

'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',

2565

'age_limit': 18,

2566

'channel': 'Projekt Melody',

2567

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

2568

'view_count': int,

2569

'availability': 'needs_auth',

2570

'comment_count': int,

2571

'live_status': 'not_live',

2572

'uploader': 'Projekt Melody',

2573

'duration': 106,

2574

},

2575

'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},

2576

},

2577

{

2578

'url': 'https://www.youtube.com/live/qVv6vCqciTM',

'info_dict': {

'id': 'qVv6vCqciTM',

'ext': 'mp4',

'age_limit': 0,

'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2584

'comment_count': int,

2585

'chapters': 'count:13',

2586

'upload_date': '20221223',

2587

'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',

2588

'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2589

'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',

2590

'like_count': int,

2591

'release_date': '20221223',

2592

'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

2593

'title': '【 #インターネット女クリスマス】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',

2594

'view_count': int,

2595

'playable_in_embed': True,

2596

'duration': 4438,

2597

'availability': 'public',

2598

'channel_follower_count': int,

2599

'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',

2600

'categories': ['Entertainment'],

2601

'live_status': 'was_live',

2602

'release_timestamp': 1671793345,

2603

'channel': 'さなちゃんねる',

2604

'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',

2605

'uploader': 'さなちゃんねる',

},

},

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2612

{

2613

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2614

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2619

'upload_date': '20080526',

2620

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2621

'uploader': 'Christopher Sykes',

2622

'uploader_id': 'ChristopherJSykes',

2623

'age_limit': 0,

2624

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2625

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2626

'playable_in_embed': True,

2627

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2628

'like_count': int,

2629

'comment_count': int,

2630

'channel': 'Christopher Sykes',

2631

'live_status': 'not_live',

2632

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2633

'availability': 'public',

2634

'duration': 195,

2635

'view_count': int,

2636

'categories': ['Science & Technology'],

2637

'channel_follower_count': int,

2638

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2639

},

2640

'params': {

2641

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2648

from ..utils import parse_qs

2649

2650

qs = parse_qs(url)

2651

if qs.get('list', [None])[0]:

2652

return False

2653

return super().suitable(url)

2654

2655

def __init__(self, *args, **kwargs):

2656

super().__init__(*args, **kwargs)

2657

self._code_cache = {}

2658

self._player_cache = {}

2659

2660

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2661

lock = threading.Lock()

2662

start_time = time.time()

2663

formats = [f for f in formats if f.get('is_from_start')]

2664

2665

def refetch_manifest(format_id, delay):

2666

nonlocal formats, start_time, is_live

2667

if time.time() <= start_time + delay:

2668

return

2669

2670

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2671

video_details = traverse_obj(

2672

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2673

microformats = traverse_obj(

2674

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2675

expected_type=dict, default=[])

2676

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2677

is_live = live_status == 'is_live'

2678

start_time = time.time()

2679

2680

def mpd_feed(format_id, delay):

2681

"""

2682

@returns (manifest_url, manifest_stream_number, is_live) or None

2683

"""

2684

for retry in self.RetryManager(fatal=False):

2685

with lock:

2686

refetch_manifest(format_id, delay)

2687

2688

f = next((f for f in formats if f['format_id'] == format_id), None)

2689

if not f:

2690

if not is_live:

2691

retry.error = f'{video_id}: Video is no longer live'

2692

else:

2693

retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'

2694

continue

2695

return f['manifest_url'], f['manifest_stream_number'], is_live

return None

for f in formats:

f['is_live'] = is_live

2700

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2701

live_start_time, mpd_feed, not is_live and f.copy())

2702

if is_live:

2703

f['fragments'] = gen

2704

f['protocol'] = 'http_dash_segments_generator'

2705

else:

2706

f['fragments'] = LazyList(gen({}))

2707

del f['is_from_start']

2708

2709

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2710

FETCH_SPAN, MAX_DURATION = 5, 432000

2711

2712

mpd_url, stream_number, is_live = None, None, True

2713

2714

begin_index = 0

2715

download_start_time = ctx.get('start') or time.time()

2716

2717

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2718

if lack_early_segments:

2719

self.report_warning(bug_reports_message(

2720

'Starting download from the last 120 hours of the live stream since '

2721

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2722

lack_early_segments = True

2723

2724

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2725

fragments, fragment_base_url = None, None

2726

2727

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2728

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2729

# Obtain from MPD's maximum seq value

2730

old_mpd_url = mpd_url

2731

last_error = ctx.pop('last_error', None)

2732

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2733

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2734

or (mpd_url, stream_number, False))

2735

if not refresh_sequence:

2736

if expire_fast and not is_live:

2737

return False, last_seq

2738

elif old_mpd_url == mpd_url:

2739

return True, last_seq

2740

if manifestless_orig_fmt:

2741

fmt_info = manifestless_orig_fmt

2742

else:

2743

try:

2744

fmts, _ = self._extract_mpd_formats_and_subtitles(

2745

mpd_url, None, note=False, errnote=False, fatal=False)

2746

except ExtractorError:

2747

fmts = None

2748

if not fmts:

2749

no_fragment_score += 2

2750

return False, last_seq

2751

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2752

fragments = fmt_info['fragments']

2753

fragment_base_url = fmt_info['fragment_base_url']

2754

assert fragment_base_url

2755

2756

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2757

return True, _last_seq

2758

2759

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2760

while is_live:

2761

fetch_time = time.time()

2762

if no_fragment_score > 30:

2763

return

2764

if last_segment_url:

2765

# Obtain from "X-Head-Seqnum" header value from each segment

2766

try:

2767

urlh = self._request_webpage(

2768

last_segment_url, None, note=False, errnote=False, fatal=False)

2769

except ExtractorError:

2770

urlh = None

2771

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2772

if last_seq is None:

2773

no_fragment_score += 2

2774

last_segment_url = None

2775

continue

2776

else:

2777

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2778

no_fragment_score += 2

2779

if not should_continue:

2780

continue

2781

2782

if known_idx > last_seq:

2783

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2789

# skip from the start when it's negative value

2790

known_idx = last_seq + begin_index

2791

if lack_early_segments:

2792

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2793

try:

2794

for idx in range(known_idx, last_seq):

2795

# do not update sequence here or you'll get skipped some part of it

2796

should_continue, _ = _extract_sequence_from_mpd(False, False)

2797

if not should_continue:

2798

known_idx = idx - 1

2799

raise ExtractorError('breaking out of outer loop')

2800

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2801

yield {

2802

'url': last_segment_url,

2803

'fragment_count': last_seq,

2804

}

2805

if known_idx == last_seq:

2806

no_fragment_score += 5

2807

else:

2808

no_fragment_score = 0

2809

known_idx = last_seq

2810

except ExtractorError:

2811

continue

2812

2813

if manifestless_orig_fmt:

2814

# Stop at the first iteration if running for post-live manifestless;

2815

# fragment count no longer increase since it starts

2816

break

2817

2818

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2819

2820

def _extract_player_url(self, *ytcfgs, webpage=None):

2821

player_url = traverse_obj(

2822

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2823

get_all=False, expected_type=str)

2824

if not player_url:

2825

return

2826

return urljoin('https://www.youtube.com', player_url)

2827

2828

def _download_player_url(self, video_id, fatal=False):

2829

res = self._download_webpage(

2830

'https://www.youtube.com/iframe_api',

2831

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2832

if res:

2833

player_version = self._search_regex(

2834

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2835

if player_version:

2836

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2837

2838

def _signature_cache_id(self, example_sig):

2839

""" Return a string representation of a signature """

2840

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2841

2842

@classmethod

2843

def _extract_player_info(cls, player_url):

2844

for player_re in cls._PLAYER_INFO_RE:

2845

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2850

return id_m.group('id')

2851

2852

def _load_player(self, video_id, player_url, fatal=True):

2853

player_id = self._extract_player_info(player_url)

2854

if player_id not in self._code_cache:

2855

code = self._download_webpage(

2856

player_url, video_id, fatal=fatal,

2857

note='Downloading player ' + player_id,

2858

errnote='Download of %s failed' % player_url)

2859

if code:

2860

self._code_cache[player_id] = code

2861

return self._code_cache.get(player_id)

2862

2863

def _extract_signature_function(self, video_id, player_url, example_sig):

2864

player_id = self._extract_player_info(player_url)

2865

2866

# Read from filesystem cache

2867

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2868

assert os.path.basename(func_id) == func_id

2869

2870

self.write_debug(f'Extracting signature function {func_id}')

2871

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2872

2873

if not cache_spec:

2874

code = self._load_player(video_id, player_url)

2875

if code:

2876

res = self._parse_sig_js(code)

2877

test_string = ''.join(map(chr, range(len(example_sig))))

2878

cache_spec = [ord(c) for c in res(test_string)]

2879

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2880

2881

return lambda s: ''.join(s[i] for i in cache_spec)

2882

2883

def _print_sig_code(self, func, example_sig):

2884

if not self.get_param('youtube_print_sig_code'):

2885

return

2886

2887

def gen_sig_code(idxs):

2888

def _genslice(start, end, step):

2889

starts = '' if start == 0 else str(start)

2890

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2891

steps = '' if step == 1 else (':%d' % step)

2892

return f's[{starts}{ends}{steps}]'

2893

2894

step = None

2895

# Quelch pyflakes warnings - start will be set when step is set

2896

start = '(Never used)'

2897

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2902

step = None

2903

continue

2904

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2914

2915

test_string = ''.join(map(chr, range(len(example_sig))))

2916

cache_res = func(test_string)

2917

cache_spec = [ord(c) for c in cache_res]

2918

expr_code = ' + '.join(gen_sig_code(cache_spec))

2919

signature_id_tuple = '(%s)' % (

2920

', '.join(str(len(p)) for p in example_sig.split('.')))

2921

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2922

' return %s\n') % (signature_id_tuple, expr_code)

2923

self.to_screen('Extracted signature function:\n' + code)

2924

2925

def _parse_sig_js(self, jscode):

2926

funcname = self._search_regex(

2927

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2928

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2929

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2930

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2931

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2932

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2933

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2934

# Obsolete patterns

2935

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2936

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2937

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2938

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2939

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2940

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2941

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2942

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2943

jscode, 'Initial JS player signature function name', group='sig')

2944

2945

jsi = JSInterpreter(jscode)

2946

initial_function = jsi.extract_function(funcname)

2947

return lambda s: initial_function([s])

2948

2949

def _cached(self, func, *cache_id):

2950

def inner(*args, **kwargs):

2951

if cache_id not in self._player_cache:

2952

try:

2953

self._player_cache[cache_id] = func(*args, **kwargs)

2954

except ExtractorError as e:

2955

self._player_cache[cache_id] = e

2956

except Exception as e:

2957

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2958

2959

ret = self._player_cache[cache_id]

2960

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2966

"""Turn the encrypted s field into a working signature"""

2967

extract_sig = self._cached(

2968

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2969

func = extract_sig(video_id, player_url, s)

2970

self._print_sig_code(func, s)

2971

return func(s)

2972

2973

def _decrypt_nsig(self, s, video_id, player_url):

2974

"""Turn the encrypted n field into a working signature"""

2975

if player_url is None:

2976

raise ExtractorError('Cannot decrypt nsig without player_url')

2977

player_url = urljoin('https://www.youtube.com', player_url)

2978

2979

try:

2980

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2981

except ExtractorError as e:

2982

raise ExtractorError('Unable to extract nsig function code', cause=e)

2983

if self.get_param('youtube_print_sig_code'):

2984

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2985

2986

try:

2987

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2988

ret = extract_nsig(jsi, func_code)(s)

2989

except JSInterpreter.Exception as e:

2990

try:

2991

jsi = PhantomJSwrapper(self, timeout=5000)

2992

except ExtractorError:

2993

raise e

2994

self.report_warning(

2995

f'Native nsig extraction failed: Trying with PhantomJS\n'

2996

f' n = {s} ; player = {player_url}', video_id)

2997

self.write_debug(e, only_once=True)

2998

2999

args, func_body = func_code

3000

ret = jsi.execute(

3001

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

3002

video_id=video_id, note='Executing signature code').strip()

3003

3004

self.write_debug(f'Decrypted nsig {s} => {ret}')

3005

return ret

3006

3007

def _extract_n_function_name(self, jscode):

3008

funcname, idx = self._search_regex(

3009

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

3010

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

3015

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

3016

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

3017

3018

def _extract_n_function_code(self, video_id, player_url):

3019

player_id = self._extract_player_info(player_url)

3020

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

3021

jscode = func_code or self._load_player(video_id, player_url)

3022

jsi = JSInterpreter(jscode)

3023

3024

if func_code:

3025

return jsi, player_id, func_code

3026

3027

func_name = self._extract_n_function_name(jscode)

3028

3029

# For redundancy

3030

func_code = self._search_regex(

3031

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

3032

# NB: The end of the regex is intentionally kept strict

3033

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

3034

jscode, 'nsig function', group=('var', 'code'), default=None)

3035

if func_code:

3036

func_code = ([func_code[0]], func_code[1])

3037

else:

3038

self.write_debug('Extracting nsig function with jsinterp')

3039

func_code = jsi.extract_function_code(func_name)

3040

3041

self.cache.store('youtube-nsig', player_id, func_code)

3042

return jsi, player_id, func_code

3043

3044

def _extract_n_function_from_code(self, jsi, func_code):

3045

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

3051

raise

3052

except Exception as e:

3053

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

3054

3055

if ret.startswith('enhanced_except_'):

3056

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

3062

"""

3063

Extract signatureTimestamp (sts)

3064

Required to tell API what sig/player version is in use.

3065

"""

3066

sts = None

3067

if isinstance(ytcfg, dict):

3068

sts = int_or_none(ytcfg.get('STS'))

3069

3070

if not sts:

3071

# Attempt to extract from player

3072

if player_url is None:

3073

error_msg = 'Cannot extract signature timestamp without player_url.'

3074

if fatal:

3075

raise ExtractorError(error_msg)

3076

self.report_warning(error_msg)

3077

return

3078

code = self._load_player(video_id, player_url, fatal=fatal)

3079

if code:

3080

sts = int_or_none(self._search_regex(

3081

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

3082

'JS player signature timestamp', group='sts', fatal=fatal))

3083

return sts

3084

3085

def _mark_watched(self, video_id, player_responses):

3086

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

3087

label = 'fully ' if is_full else ''

3088

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

3089

expected_type=url_or_none)

3090

if not url:

3091

self.report_warning(f'Unable to mark {label}watched')

3092

return

3093

parsed_url = urllib.parse.urlparse(url)

3094

qs = urllib.parse.parse_qs(parsed_url.query)

3095

3096

# cpn generation algorithm is reverse engineered from base.js.

3097

# In fact it works even with dummy cpn.

3098

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

3099

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

3100

3101

# # more consistent results setting it to right before the end

3102

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

3113

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

3120

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

3121

3122

self._download_webpage(

3123

url, video_id, f'Marking {label}watched',

3124

'Unable to mark watched', fatal=False)

3125

3126

@classmethod

3127

def _extract_from_webpage(cls, url, webpage):

3128

# Invidious Instances

3129

# https://github.com/yt-dlp/yt-dlp/issues/195

3130

# https://github.com/iv-org/invidious/pull/1730

3131

mobj = re.search(

3132

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3133

webpage)

3134

if mobj:

3135

yield cls.url_result(mobj.group('url'), cls)

3136

raise cls.StopExtraction()

3137

3138

yield from super()._extract_from_webpage(url, webpage)

3139

3140

# lazyYT YouTube embed

3141

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3142

yield cls.url_result(unescapeHTML(id_), cls, id_)

3143

3144

# Wordpress "YouTube Video Importer" plugin

3145

for m in re.findall(r'''(?x)<div[^>]+

3146

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3147

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3148

yield cls.url_result(m[-1], cls, m[-1])

3149

3150

@classmethod

3151

def extract_id(cls, url):

3152

video_id = cls.get_temp_id(url)

3153

if not video_id:

3154

raise ExtractorError(f'Invalid URL: {url}')

3155

return video_id

3156

3157

def _extract_chapters_from_json(self, data, duration):

3158

chapter_list = traverse_obj(

3159

data, (

3160

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3161

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3162

), expected_type=list)

3163

3164

return self._extract_chapters(

3165

chapter_list,

3166

chapter_time=lambda chapter: float_or_none(

3167

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3168

chapter_title=lambda chapter: traverse_obj(

3169

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3170

duration=duration)

3171

3172

def _extract_chapters_from_engagement_panel(self, data, duration):

3173

content_list = traverse_obj(

3174

data,

3175

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3176

expected_type=list, default=[])

3177

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3178

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3179

3180

return next(filter(None, (

3181

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3182

chapter_time, chapter_title, duration)

3183

for contents in content_list)), [])

3184

3185

def _extract_chapters_from_description(self, description, duration):

3186

duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'

3187

sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'

3188

return self._extract_chapters(

3189

re.findall(sep_re % (duration_re, r'.+?'), description or ''),

3190

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3191

duration=duration, strict=False) or self._extract_chapters(

3192

re.findall(sep_re % (r'.+?', duration_re), description or ''),

3193

chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],

3194

duration=duration, strict=False)

3195

3196

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3201

'title': chapter_title(chapter),

3202

} for chapter in chapter_list or []]

3203

if not strict:

3204

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3205

3206

chapters = [{'start_time': 0}]

3207

for idx, chapter in enumerate(chapter_list):

3208

if chapter['start_time'] is None:

3209

self.report_warning(f'Incomplete chapter {idx}')

3210

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3211

chapters.append(chapter)

3212

elif chapter not in chapters:

3213

self.report_warning(

3214

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3215

return chapters[1:]

3216

3217

def _extract_comment(self, comment_renderer, parent=None):

3218

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3223

3224

# Timestamp is an estimate calculated from the current time and time_text

3225

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3226

timestamp = self._parse_time_text(time_text)

3227

3228

author = self._get_text(comment_renderer, 'authorText')

3229

author_id = try_get(comment_renderer,

3230

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3231

3232

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3233

lambda x: x['likeCount']), str)) or 0

3234

author_thumbnail = try_get(comment_renderer,

3235

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3236

3237

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3238

is_favorited = 'creatorHeart' in (try_get(

3239

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3244

'time_text': time_text,

3245

'like_count': votes,

3246

'is_favorited': is_favorited,

3247

'author': author,

3248

'author_id': author_id,

3249

'author_thumbnail': author_thumbnail,

3250

'author_is_uploader': author_is_uploader,

3251

'parent': parent or 'root'

3252

}

3253

3254

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3255

3256

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3257

3258

def extract_header(contents):

3259

_continuation = None

3260

for content in contents:

3261

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3262

expected_comment_count = self._get_count(

3263

comments_header_renderer, 'countText', 'commentsCount')

3264

3265

if expected_comment_count:

3266

tracker['est_total'] = expected_comment_count

3267

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3268

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3269

3270

sort_menu_item = try_get(

3271

comments_header_renderer,

3272

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3273

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3274

3275

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3276

if not _continuation:

3277

continue

3278

3279

sort_text = str_or_none(sort_menu_item.get('title'))

3280

if not sort_text:

3281

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3282

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3287

if not parent:

3288

tracker['current_page_thread'] = 0

3289

for content in contents:

3290

if not parent and tracker['total_parent_comments'] >= max_parents:

3291

yield

3292

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3293

comment_renderer = get_first(

3294

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3295

expected_type=dict, default={})

3296

3297

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3302

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3303

yield comment

3304

3305

# Attempt to get the replies

3306

comment_replies_renderer = try_get(

3307

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3308

3309

if comment_replies_renderer:

3310

tracker['current_page_thread'] += 1

3311

comment_entries_iter = self._comment_entries(

3312

comment_replies_renderer, ytcfg, video_id,

3313

parent=comment.get('id'), tracker=tracker)

3314

yield from itertools.islice(comment_entries_iter, min(

3315

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3316

3317

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3323

total_parent_comments=0,

3324

total_reply_comments=0)

3325

3326

# TODO: Deprecated

3327

# YouTube comments have a max depth of 2

3328

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3329

if max_depth:

3330

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3331

'Set max replies in the max-comments extractor argument instead')

3332

if max_depth == 1 and parent:

3333

return

3334

3335

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3336

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3337

3338

continuation = self._extract_continuation(root_continuation_data)

3339

3340

response = None

3341

is_forced_continuation = False

3342

is_first_continuation = parent is None

3343

if is_first_continuation and not continuation:

3344

# Sometimes you can get comments by generating the continuation yourself,

3345

# even if YouTube initially reports them being disabled - e.g. stories comments.

3346

# Note: if the comment section is actually disabled, YouTube may return a response with

3347

# required check_get_keys missing. So we will disable that check initially in this case.

3348

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3349

is_forced_continuation = True

3350

3351

for page_num in itertools.count(0):

3352

if not continuation:

3353

break

3354

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3355

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3356

if page_num == 0:

3357

if is_first_continuation:

3358

note_prefix = 'Downloading comment section API JSON'

3359

else:

3360

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3361

tracker['current_page_thread'], comment_prog_str)

3362

else:

3363

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3364

' ' if parent else '', ' replies' if parent else '',

3365

page_num, comment_prog_str)

3366

try:

3367

response = self._extract_response(

3368

item_id=None, query=continuation,

3369

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3370

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3371

except ExtractorError as e:

3372

# Ignore incomplete data error for replies if retries didn't work.

3373

# This is to allow any other parent comments and comment threads to be downloaded.

3374

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3375

if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:

3376

self.report_warning(

3377

'Received incomplete data for a comment reply thread and retrying did not help. '

3378

'Ignoring to let other comments be downloaded.')

3379

else:

3380

raise

3381

is_forced_continuation = False

3382

continuation_contents = traverse_obj(

3383

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3384

3385

continuation = None

3386

for continuation_section in continuation_contents:

3387

continuation_items = traverse_obj(

3388

continuation_section,

3389

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3390

get_all=False, expected_type=list) or []

3391

if is_first_continuation:

3392

continuation = extract_header(continuation_items)

3393

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3407

if message and not parent and tracker['running_total'] == 0:

3408

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3409

raise self.CommentsDisabled

3410

3411

@staticmethod

3412

def _generate_comment_continuation(video_id):

3413

"""

3414

Generates initial comment section continuation token from given video id

3415

"""

3416

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3417

return base64.b64encode(token.encode()).decode()

3418

3419

def _get_comments(self, ytcfg, video_id, contents, webpage):

3420

"""Entry for comment extraction"""

3421

def _real_comment_extract(contents):

3422

renderer = next((

3423

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3424

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3425

yield from self._comment_entries(renderer, ytcfg, video_id)

3426

3427

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3428

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3429

3430

@staticmethod

3431

def _get_checkok_params():

3432

return {'contentCheckOk': True, 'racyCheckOk': True}

3433

3434

@classmethod

3435

def _generate_player_context(cls, sts=None):

3436

context = {

3437

'html5Preference': 'HTML5_PREF_WANTS',

3438

}

3439

if sts is not None:

3440

context['signatureTimestamp'] = sts

3441

return {

3442

'playbackContext': {

3443

'contentPlaybackContext': context

3444

},

3445

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3450

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3451

return True

3452

3453

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3454

AGE_GATE_REASONS = (

3455

'confirm your age', 'age-restricted', 'inappropriate', # reason

3456

'age_verification_required', 'age_check_required', # status

3457

)

3458

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3459

3460

@staticmethod

3461

def _is_unplayable(player_response):

3462

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3463

3464

_STORY_PLAYER_PARAMS = '8AEB'

3465

3466

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3467

3468

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3469

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3470

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3471

headers = self.generate_api_headers(

3472

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3478

yt_query['params'] = self._STORY_PLAYER_PARAMS

3479

3480

yt_query.update(self._generate_player_context(sts))

3481

return self._extract_response(

3482

item_id=video_id, ep='player', query=yt_query,

3483

ytcfg=player_ytcfg, headers=headers, fatal=True,

3484

default_client=client,

3485

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3486

) or None

3487

3488

def _get_requested_clients(self, url, smuggled_data):

3489

requested_clients = []

3490

default = ['android', 'web']

3491

allowed_clients = sorted(

3492

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3493

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3494

for client in self._configuration_arg('player_client'):

3495

if client in allowed_clients:

3496

requested_clients.append(client)

3497

elif client == 'default':

3498

requested_clients.extend(default)

3499

elif client == 'all':

3500

requested_clients.extend(allowed_clients)

3501

else:

3502

self.report_warning(f'Skipping unsupported client {client}')

3503

if not requested_clients:

3504

requested_clients = default

3505

3506

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3507

requested_clients.extend(

3508

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3509

3510

return orderedSet(requested_clients)

3511

3512

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3513

initial_pr = None

3514

if webpage:

3515

initial_pr = self._search_json(

3516

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3517

3518

all_clients = set(clients)

3519

clients = clients[::-1]

3520

prs = []

3521

3522

def append_client(*client_names):

3523

""" Append the first client name that exists but not already used """

3524

for client_name in client_names:

3525

actual_client = _split_innertube_client(client_name)[0]

3526

if actual_client in INNERTUBE_CLIENTS:

3527

if actual_client not in all_clients:

3528

clients.append(client_name)

3529

all_clients.add(actual_client)

3530

return

3531

3532

# Android player_response does not have microFormats which are needed for

3533

# extraction of some data. So we return the initial_pr with formats

3534

# stripped out even if not requested by the user

3535

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3536

if initial_pr:

3537

pr = dict(initial_pr)

3538

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3543

player_url = None

3544

while clients:

3545

client, base_client, variant = _split_innertube_client(clients.pop())

3546

player_ytcfg = master_ytcfg if client == 'web' else {}

3547

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3548

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3549

3550

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3551

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3552

if 'js' in self._configuration_arg('player_skip'):

3553

require_js_player = False

3554

player_url = None

3555

3556

if not player_url and not tried_iframe_fallback and require_js_player:

3557

player_url = self._download_player_url(video_id)

3558

tried_iframe_fallback = True

3559

3560

try:

3561

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3562

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3563

except ExtractorError as e:

3564

if last_error:

3565

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3571

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3572

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3573

if pr_video_id and pr_video_id != video_id:

3574

self.report_warning(

3575

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3580

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3581

append_client(f'{base_client}_creator')

3582

elif self._is_agegated(pr):

3583

if variant == 'tv_embedded':

3584

append_client(f'{base_client}_embedded')

3585

elif not variant:

3586

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3592

return prs, player_url

3593

3594

def _needs_live_processing(self, live_status, duration):

3595

if (live_status == 'is_live' and self.get_param('live_from_start')

3596

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3597

return live_status

3598

3599

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3600

itags, stream_ids = collections.defaultdict(set), []

3601

itag_qualities, res_qualities = {}, {0: None}

3602

q = qualities([

3603

# Normally tiny is the smallest video-only formats. But

3604

# audio-only formats with unknown quality may get tagged as tiny

3605

'tiny',

3606

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3607

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3608

])

3609

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3610

3611

for fmt in streaming_formats:

3612

if fmt.get('targetDurationSec'):

3613

continue

3614

3615

itag = str_or_none(fmt.get('itag'))

3616

audio_track = fmt.get('audioTrack') or {}

3617

stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))

3618

if stream_id in stream_ids:

3619

continue

3620

3621

quality = fmt.get('quality')

3622

height = int_or_none(fmt.get('height'))

3623

if quality == 'tiny' or not quality:

3624

quality = fmt.get('audioQuality', '').lower() or quality

3625

# The 3gp format (17) in android client has a quality of "small",

3626

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3632

if height:

3633

res_qualities[height] = quality

3634

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3635

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3636

# number of fragment that would subsequently requested with (`&sq=N`)

3637

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3638

continue

3639

3640

fmt_url = fmt.get('url')

3641

if not fmt_url:

3642

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3643

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3644

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3645

if not all((sc, fmt_url, player_url, encrypted_sig)):

3646

continue

3647

try:

3648

fmt_url += '&%s=%s' % (

3649

traverse_obj(sc, ('sp', -1)) or 'signature',

3650

self._decrypt_signature(encrypted_sig, video_id, player_url)

3651

)

3652

except ExtractorError as e:

3653

self.report_warning('Signature extraction failed: Some formats may be missing',

3654

video_id=video_id, only_once=True)

3655

self.write_debug(e, only_once=True)

3656

continue

3657

3658

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3663

fmt_url = update_url_query(fmt_url, {

3664

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3665

})

3666

except ExtractorError as e:

3667

phantomjs_hint = ''

3668

if isinstance(e, JSInterpreter.Exception):

3669

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3670

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3671

if player_url:

3672

self.report_warning(

3673

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3674

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3675

self.write_debug(e, only_once=True)

3676

else:

3677

self.report_warning(

3678

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3679

video_id=video_id, only_once=True)

3680

throttled = True

3681

3682

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3683

language_preference = (

3684

10 if audio_track.get('audioIsDefault') and 10

3685

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3686

else -1)

3687

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3688

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3689

# Make sure to avoid false positives with small duration differences.

3690

# E.g. __2ABJjxzNo, ySuUZEjARPY

3691

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3692

if is_damaged:

3693

self.report_warning(

3694

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3695

dct = {

3696

'asr': int_or_none(fmt.get('audioSampleRate')),

3697

'filesize': int_or_none(fmt.get('contentLength')),

3698

'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',

3699

'format_note': join_nonempty(

3700

'%s%s' % (audio_track.get('displayName') or '',

3701

' (default)' if language_preference > 0 else ''),

3702

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3703

'DRC' if fmt.get('isDrc') else None,

3704

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3705

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3706

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3707

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3708

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3709

'fps': int_or_none(fmt.get('fps')) or None,

3710

'audio_channels': fmt.get('audioChannels'),

3711

'height': height,

3712

'quality': q(quality) - bool(fmt.get('isDrc')) / 2,

3713

'has_drm': bool(fmt.get('drmFamilies')),

3714

'tbr': tbr,

3715

'url': fmt_url,

3716

'width': int_or_none(fmt.get('width')),

3717

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3718

'desc' if language_preference < -1 else '') or None,

3719

'language_preference': language_preference,

3720

# Strictly de-prioritize damaged and 3gp formats

3721

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3722

}

3723

mime_mobj = re.match(

3724

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3725

if mime_mobj:

3726

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3727

dct.update(parse_codecs(mime_mobj.group(2)))

3728

no_audio = dct.get('acodec') == 'none'

3729

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3735

dct['downloader_options'] = {

3736

# Youtube throttles chunks >~10M

3737

'http_chunk_size': 10485760,

3738

}

3739

if dct.get('ext'):

3740

dct['container'] = dct['ext'] + '_dash'

3741

3742

if itag:

3743

itags[itag].add(('https', dct.get('language')))

3744

stream_ids.append(stream_id)

3745

yield dct

3746

3747

needs_live_processing = self._needs_live_processing(live_status, duration)

3748

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3749

3750

skip_manifests = set(self._configuration_arg('skip'))

3751

if (not self.get_param('youtube_include_hls_manifest', True)

3752

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3753

or needs_live_processing and skip_bad_formats):

3754

skip_manifests.add('hls')

3755

3756

if not self.get_param('youtube_include_dash_manifest', True):

3757

skip_manifests.add('dash')

3758

if self._configuration_arg('include_live_dash'):

3759

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3760

'Use include_incomplete_formats extractor argument instead')

3761

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3762

skip_manifests.add('dash')

3763

3764

def process_manifest_format(f, proto, itag):

3765

key = (proto, f.get('language'))

3766

if key in itags[itag]:

return False

itags[itag].add(key)

if any(p != proto for p, _ in itags[itag]):

3771

f['format_id'] = f'{itag}-{proto}'

3772

elif itag:

3773

f['format_id'] = itag

3774

3775

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3776

if f['quality'] == -1 and f.get('height'):

3777

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3782

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3783

if hls_manifest_url:

3784

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3785

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3786

subtitles = self._merge_subtitles(subs, subtitles)

3787

for f in fmts:

3788

if process_manifest_format(f, 'hls', self._search_regex(

3789

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3790

yield f

3791

3792

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3793

if dash_manifest_url:

3794

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3795

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3796

for f in formats:

3797

if process_manifest_format(f, 'dash', f['format_id']):

3798

f['filesize'] = int_or_none(self._search_regex(

3799

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3800

if needs_live_processing:

3801

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3807

spec = get_first(

3808

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3809

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3814

args = args.split('#')

3815

counts = list(map(int_or_none, args[:5]))

3816

if len(args) != 8 or not all(counts):

3817

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3818

continue

3819

width, height, frame_count, cols, rows = counts

3820

N, sigh = args[6:]

3821

3822

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3823

fragment_count = frame_count / (cols * rows)

3824

fragment_duration = duration / fragment_count

3825

yield {

3826

'format_id': f'sb{i}',

3827

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3840

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3841

} for j in range(math.ceil(fragment_count))],

3842

}

3843

3844

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3845

webpage = None

3846

if 'webpage' not in self._configuration_arg('player_skip'):

3847

query = {'bpctr': '9999999999', 'has_verified': '1'}

3848

if smuggled_data.get('is_story'):

3849

query['pp'] = self._STORY_PLAYER_PARAMS

3850

webpage = self._download_webpage(

3851

webpage_url, video_id, fatal=False, query=query)

3852

3853

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3854

3855

player_responses, player_url = self._extract_player_responses(

3856

self._get_requested_clients(url, smuggled_data),

3857

video_id, webpage, master_ytcfg, smuggled_data)

3858

3859

return webpage, master_ytcfg, player_responses, player_url

3860

3861

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3862

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3863

is_live = get_first(video_details, 'isLive')

3864

if is_live is None:

3865

is_live = get_first(live_broadcast_details, 'isLiveNow')

3866

live_content = get_first(video_details, 'isLiveContent')

3867

is_upcoming = get_first(video_details, 'isUpcoming')

3868

post_live = get_first(video_details, 'isPostLiveDvr')

3869

live_status = ('post_live' if post_live

3870

else 'is_live' if is_live

3871

else 'is_upcoming' if is_upcoming

3872

else 'was_live' if live_content

3873

else 'not_live' if False in (is_live, live_content)

3874

else None)

3875

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3876

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3877

3878

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3879

3880

def _real_extract(self, url):

3881

url, smuggled_data = unsmuggle_url(url, {})

3882

video_id = self._match_id(url)

3883

3884

base_url = self.http_scheme() + '//www.youtube.com/'

3885

webpage_url = base_url + 'watch?v=' + video_id

3886

3887

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3888

3889

playability_statuses = traverse_obj(

3890

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3891

3892

trailer_video_id = get_first(

3893

playability_statuses,

3894

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3895

expected_type=str)

3896

if trailer_video_id:

3897

return self.url_result(

3898

trailer_video_id, self.ie_key(), trailer_video_id)

3899

3900

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3901

if webpage else (lambda x: None))

3902

3903

video_details = traverse_obj(

3904

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3905

microformats = traverse_obj(

3906

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3907

expected_type=dict, default=[])

3908

3909

translated_title = self._get_text(microformats, (..., 'title'))

3910

video_title = (self._preferred_lang and translated_title

3911

or get_first(video_details, 'title') # primary

3912

or translated_title

3913

or search_meta(['og:title', 'twitter:title', 'title']))

3914

translated_description = self._get_text(microformats, (..., 'description'))

3915

original_description = get_first(video_details, 'shortDescription')

3916

video_description = (

3917

self._preferred_lang and translated_description

3918

# If original description is blank, it will be an empty string.

3919

# Do not prefer translated description in this case.

3920

or original_description if original_description is not None else translated_description)

3921

3922

multifeed_metadata_list = get_first(

3923

player_responses,

3924

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3925

expected_type=str)

3926

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3927

if self.get_param('noplaylist'):

3928

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3933

# Unquote should take place before split on comma (,) since textual

3934

# fields may contain comma as well (see

3935

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3936

feed_data = urllib.parse.parse_qs(

3937

urllib.parse.unquote_plus(feed))

3938

3939

def feed_entry(name):

3940

return try_get(

3941

feed_data, lambda x: x[name][0], str)

3942

3943

feed_id = feed_entry('id')

3944

if not feed_id:

3945

continue

3946

feed_title = feed_entry('title')

3947

title = video_title

3948

if feed_title:

3949

title += ' (%s)' % feed_title

3950

entries.append({

3951

'_type': 'url_transparent',

3952

'ie_key': 'Youtube',

3953

'url': smuggle_url(

3954

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3955

{'force_singlefeed': True}),

3956

'title': title,

3957

})

3958

feed_ids.append(feed_id)

3959

self.to_screen(

3960

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3961

% (', '.join(feed_ids), video_id))

3962

return self.playlist_result(

3963

entries, video_id, video_title, video_description)

3964

3965

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

3966

or int_or_none(get_first(microformats, 'lengthSeconds'))

3967

or parse_duration(search_meta('duration')) or None)

3968

3969

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3970

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3971

if live_status == 'post_live':

3972

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3973

3974

if not formats:

3975

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3976

self.report_drm(video_id)

3977

pemr = get_first(

3978

playability_statuses,

3979

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3980

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3981

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3982

if subreason:

3983

if subreason == 'The uploader has not made this video available in your country.':

3984

countries = get_first(microformats, 'availableCountries')

3985

if not countries:

3986

regions_allowed = search_meta('regionsAllowed')

3987

countries = regions_allowed.split(',') if regions_allowed else None

3988

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3989

reason += f'. {subreason}'

3990

if reason:

3991

self.raise_no_formats(reason, expected=True)

3992

3993

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3994

if not keywords and webpage:

3995

keywords = [

3996

unescapeHTML(m.group('content'))

3997

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3998

for keyword in keywords:

3999

if keyword.startswith('yt:stretch='):

4000

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

4001

if mobj:

4002

# NB: float is intentional for forcing float division

4003

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

4008

f['stretched_ratio'] = ratio

4009

break

4010

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

4011

thumbnail_url = search_meta(['og:image', 'twitter:image'])

4012

if thumbnail_url:

4013

thumbnails.append({

4014

'url': thumbnail_url,

4015

})

4016

original_thumbnails = thumbnails.copy()

4017

4018

# The best resolution thumbnails sometimes does not appear in the webpage

4019

# See: https://github.com/yt-dlp/yt-dlp/issues/340

4020

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

4021

thumbnail_names = [

4022

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

4023

# in resolution, these are not the custom thumbnail. So de-prioritize them

4024

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

4025

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

4026

]

4027

n_thumbnail_names = len(thumbnail_names)

4028

thumbnails.extend({

4029

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

4030

video_id=video_id, name=name, ext=ext,

4031

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

4032

} for name in thumbnail_names for ext in ('webp', 'jpg'))

4033

for thumb in thumbnails:

4034

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

4035

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

4036

self._remove_duplicate_formats(thumbnails)

4037

self._downloader._sort_thumbnails(original_thumbnails)

4038

4039

category = get_first(microformats, 'category') or search_meta('genre')

4040

channel_id = str_or_none(

4041

get_first(video_details, 'channelId')

4042

or get_first(microformats, 'externalChannelId')

4043

or search_meta('channelId'))

4044

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

4045

4046

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

4047

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

4048

if not duration and live_end_time and live_start_time:

4049

duration = live_end_time - live_start_time

4050

4051

needs_live_processing = self._needs_live_processing(live_status, duration)

4052

4053

def is_bad_format(fmt):

4054

if needs_live_processing and not fmt.get('is_from_start'):

4055

return True

4056

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

4057

and fmt.get('protocol') == 'http_dash_segments'):

4058

return True

4059

4060

for fmt in filter(is_bad_format, formats):

4061

fmt['preference'] = (fmt.get('preference') or -1) - 10

4062

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

4063

4064

if needs_live_processing:

4065

self._prepare_live_from_start_formats(

4066

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

4067

4068

formats.extend(self._extract_storyboard(player_responses, duration))

info = {

'id': video_id,

'title': video_title,

4073

'formats': formats,

4074

'thumbnails': thumbnails,

4075

# The best thumbnail that we are sure exists. Prevents unnecessary

4076

# URL checking if user don't care about getting the best possible thumbnail

4077

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

4078

'description': video_description,

4079

'uploader': get_first(video_details, 'author'),

4080

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

4081

'uploader_url': owner_profile_url,

4082

'channel_id': channel_id,

4083

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

4084

'duration': duration,

4085

'view_count': int_or_none(

4086

get_first((video_details, microformats), (..., 'viewCount'))

4087

or search_meta('interactionCount')),

4088

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

4089

'age_limit': 18 if (

4090

get_first(microformats, 'isFamilySafe') is False

4091

or search_meta('isFamilyFriendly') == 'false'

4092

or search_meta('og:restrictions:age') == '18+') else 0,

4093

'webpage_url': webpage_url,

4094

'categories': [category] if category else None,

4095

'tags': keywords,

4096

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

4097

'live_status': live_status,

4098

'release_timestamp': live_start_time,

4099

'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats

4100

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

4105

if pctr:

4106

def get_lang_code(track):

4107

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

4108

or track.get('languageCode'))

4109

4110

# Converted into dicts to remove duplicates

4111

captions = {

4112

get_lang_code(sub): sub

4113

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

4114

translation_languages = {

4115

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

4116

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

4117

4118

def process_language(container, base_url, lang_code, sub_name, query):

4119

lang_subs = container.setdefault(lang_code, [])

4120

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4131

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4132

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4133

for lang_code, caption_track in captions.items():

4134

base_url = caption_track.get('baseUrl')

4135

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4136

if not base_url:

4137

continue

4138

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4139

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4144

if not caption_track.get('isTranslatable'):

4145

continue

4146

for trans_code, trans_name in translation_languages.items():

4147

if not trans_code:

4148

continue

4149

orig_trans_code = trans_code

4150

if caption_track.get('kind') != 'asr' and trans_code != 'und':

4151

if not get_translated_subs:

4152

continue

4153

trans_code += f'-{lang_code}'

4154

trans_name += format_field(lang_name, None, ' from %s')

4155

# Add an "-orig" label to the original language so that it can be distinguished.

4156

# The subs are returned without "-orig" as well for compatibility

4157

if lang_code == f'a-{orig_trans_code}':

4158

process_language(

4159

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4160

# Setting tlang=lang returns damaged subtitles.

4161

process_language(automatic_captions, base_url, trans_code, trans_name,

4162

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4163

4164

info['automatic_captions'] = automatic_captions

4165

info['subtitles'] = subtitles

4166

4167

parsed_url = urllib.parse.urlparse(url)

4168

for component in [parsed_url.fragment, parsed_url.query]:

4169

query = urllib.parse.parse_qs(component)

4170

for k, v in query.items():

4171

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4172

d_k += '_time'

4173

if d_k not in info and k in s_ks:

4174

info[d_k] = parse_duration(query[k][0])

4175

4176

# Youtube Music Auto-generated description

4177

if video_description:

4178

mobj = re.search(

4179

r'''(?xs)

4180

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4181

(?P<album>[^\n]+)

4182

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4183

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4184

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4185

.+\nAuto-generated\ by\ YouTube\.\s*$

4186

''', video_description)

4187

if mobj:

4188

release_year = mobj.group('release_year')

4189

release_date = mobj.group('release_date')

4190

if release_date:

4191

release_date = release_date.replace('-', '')

4192

if not release_year:

4193

release_year = release_date[:4]

4194

info.update({

4195

'album': mobj.group('album'.strip()),

4196

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4197

'track': mobj.group('track').strip(),

4198

'release_date': release_date,

4199

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4205

if not initial_data:

4206

query = {'videoId': video_id}

4207

query.update(self._get_checkok_params())

4208

initial_data = self._extract_response(

4209

item_id=video_id, ep='next', fatal=False,

4210

ytcfg=master_ytcfg, query=query,

4211

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4212

note='Downloading initial data API JSON')

4213

4214

info['comment_count'] = traverse_obj(initial_data, (

4215

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4216

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4217

), (

4218

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4219

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4220

), expected_type=int_or_none, get_all=False)

4221

4222

try: # This will error if there is no livechat

4223

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4224

except (KeyError, IndexError, TypeError):

4225

pass

4226

else:

4227

info.setdefault('subtitles', {})['live_chat'] = [{

4228

# url is needed to set cookies

4229

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4230

'video_id': video_id,

4231

'ext': 'json',

4232

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4233

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4239

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4240

or self._extract_chapters_from_description(video_description, duration)

4241

or None)

4242

4243

contents = traverse_obj(

4244

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4245

expected_type=list, default=[])

4246

4247

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4248

if vpir:

4249

stl = vpir.get('superTitleLink')

4250

if stl:

4251

stl = self._get_text(stl)

4252

if try_get(

4253

vpir,

4254

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4255

info['location'] = stl

4256

else:

4257

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4258

if mobj:

4259

info.update({

4260

'series': mobj.group(1),

4261

'season_number': int(mobj.group(2)),

4262

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4271

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4272

default=[]))

4273

for tbr in tbrs:

4274

for getter, regex in [(

4275

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4276

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4277

lambda x: x['accessibility'],

4278

lambda x: x['accessibilityData']['accessibilityData'],

4279

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4280

label = (try_get(tbr, getter, dict) or {}).get('label')

4281

if label:

4282

mobj = re.match(regex, label)

4283

if mobj:

4284

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4285

break

4286

sbr_tooltip = try_get(

4287

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4288

if sbr_tooltip:

4289

like_count, dislike_count = sbr_tooltip.split(' / ')

4290

info.update({

4291

'like_count': str_to_int(like_count),

4292

'dislike_count': str_to_int(dislike_count),

4293

})

4294

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4295

if vcr:

4296

vc = self._get_count(vcr, 'viewCount')

4297

# Upcoming premieres with waiting count are treated as live here

4298

if vcr.get('isLive'):

4299

info['concurrent_view_count'] = vc

4300

elif info.get('view_count') is None:

4301

info['view_count'] = vc

4302

4303

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4304

if vsir:

4305

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4306

info.update({

4307

'channel': self._get_text(vor, 'title'),

4308

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4313

list) or []

4314

multiple_songs = False

4315

for row in rows:

4316

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4317

multiple_songs = True

4318

break

4319

for row in rows:

4320

mrr = row.get('metadataRowRenderer') or {}

4321

mrr_title = mrr.get('title')

4322

if not mrr_title:

4323

continue

4324

mrr_title = self._get_text(mrr, 'title')

4325

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4326

if mrr_title == 'License':

4327

info['license'] = mrr_contents_text

4328

elif not multiple_songs:

4329

if mrr_title == 'Album':

4330

info['album'] = mrr_contents_text

4331

elif mrr_title == 'Artist':

4332

info['artist'] = mrr_contents_text

4333

elif mrr_title == 'Song':

4334

info['track'] = mrr_contents_text

4335

4336

fallbacks = {

4337

'channel': 'uploader',

4338

'channel_id': 'uploader_id',

4339

'channel_url': 'uploader_url',

4340

}

4341

4342

# The upload date for scheduled, live and past live streams / premieres in microformats

4343

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4344

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4345

upload_date = (

4346

unified_strdate(get_first(microformats, 'uploadDate'))

4347

or unified_strdate(search_meta('uploadDate')))

4348

if not upload_date or (

4349

live_status in ('not_live', None)

4350

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4351

):

4352

upload_date = strftime_or_none(

4353

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4354

info['upload_date'] = upload_date

4355

4356

for to, frm in fallbacks.items():

4357

if not info.get(to):

4358

info[to] = info.get(frm)

4359

4360

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4366

4367

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4368

or get_first(video_details, 'isPrivate', expected_type=bool))

4369

4370

info['availability'] = (

4371

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4372

else self._availability(

4373

is_private=is_private,

4374

needs_premium=(

4375

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4376

or False if initial_data and is_private is not None else None),

4377

needs_subscription=(

4378

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4379

or False if initial_data and is_private is not None else None),

4380

needs_auth=info['age_limit'] >= 18,

4381

is_unlisted=None if is_private is None else (

4382

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4383

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4384

4385

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4386

4387

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4393

@staticmethod

4394

def passthrough_smuggled_data(func):

4395

def _smuggle(info, smuggled_data):

4396

if info.get('_type') not in ('url', 'url_transparent'):

4397

return info

4398

if smuggled_data.get('is_music_url'):

4399

parsed_url = urllib.parse.urlparse(info['url'])

4400

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4401

smuggled_data.pop('is_music_url')

4402

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4403

if smuggled_data:

4404

info['url'] = smuggle_url(info['url'], smuggled_data)

4405

return info

4406

4407

@functools.wraps(func)

4408

def wrapper(self, url):

4409

url, smuggled_data = unsmuggle_url(url, {})

4410

if self.is_music_url(url):

4411

smuggled_data['is_music_url'] = True

4412

info_dict = func(self, url, smuggled_data)

4413

if smuggled_data:

4414

_smuggle(info_dict, smuggled_data)

4415

if info_dict.get('entries'):

4416

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4421

channel_id = self._html_search_meta(

4422

'channelId', webpage, 'channel id', default=None)

4423

if channel_id:

4424

return channel_id

4425

channel_url = self._html_search_meta(

4426

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4427

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4428

'twitter:app:url:googleplay'), webpage, 'channel url')

4429

return self._search_regex(

4430

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4431

channel_url, 'channel id')

4432

4433

@staticmethod

4434

def _extract_basic_item_renderer(item):

4435

# Modified from _extract_grid_item_renderer

4436

known_basic_renderers = (

4437

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4438

)

4439

for key, renderer in item.items():

4440

if not isinstance(renderer, dict):

4441

continue

4442

elif key in known_basic_renderers:

4443

return renderer

4444

elif key.startswith('grid') and key.endswith('Renderer'):

4445

return renderer

4446

4447

def _extract_channel_renderer(self, renderer):

4448

channel_id = renderer['channelId']

4449

title = self._get_text(renderer, 'title')

4450

channel_url = f'https://www.youtube.com/channel/{channel_id}'

return {

'_type': 'url',

'url': channel_url,

'id': channel_id,

'ie_key': YoutubeTabIE.ie_key(),

4456

'channel': title,

4457

'channel_id': channel_id,

4458

'channel_url': channel_url,

4459

'title': title,

4460

'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),

4461

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

4462

'playlist_count': self._get_count(renderer, 'videoCountText'),

4463

'description': self._get_text(renderer, 'descriptionSnippet'),

4464

}

4465

4466

def _grid_entries(self, grid_renderer):

4467

for item in grid_renderer['items']:

4468

if not isinstance(item, dict):

4469

continue

4470

renderer = self._extract_basic_item_renderer(item)

4471

if not isinstance(renderer, dict):

4472

continue

4473

title = self._get_text(renderer, 'title')

4474

4475

# playlist

4476

playlist_id = renderer.get('playlistId')

4477

if playlist_id:

4478

yield self.url_result(

4479

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4480

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4485

if video_id:

4486

yield self._extract_video(renderer)

4487

continue

4488

# channel

4489

channel_id = renderer.get('channelId')

4490

if channel_id:

4491

yield self._extract_channel_renderer(renderer)

4492

continue

4493

# generic endpoint URL support

4494

ep_url = urljoin('https://www.youtube.com/', try_get(

4495

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4496

str))

4497

if ep_url:

4498

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4499

if ie.suitable(ep_url):

4500

yield self.url_result(

4501

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4502

break

4503

4504

def _music_reponsive_list_entry(self, renderer):

4505

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4506

if video_id:

4507

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4508

ie=YoutubeIE.ie_key(), video_id=video_id)

4509

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4510

if playlist_id:

4511

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4512

if video_id:

4513

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4514

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4515

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4516

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4517

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4518

if browse_id:

4519

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4520

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4521

4522

def _shelf_entries_from_content(self, shelf_renderer):

4523

content = shelf_renderer.get('content')

4524

if not isinstance(content, dict):

4525

return

4526

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4527

if renderer:

4528

# TODO: add support for nested playlists so each shelf is processed

4529

# as separate playlist

4530

# TODO: this includes only first N items

4531

yield from self._grid_entries(renderer)

4532

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4538

ep = try_get(

4539

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4540

str)

4541

shelf_url = urljoin('https://www.youtube.com', ep)

4542

if shelf_url:

4543

# Skipping links to another channels, note that checking for

4544

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4545

# will not work

4546

if skip_channels and '/channels?' in shelf_url:

4547

return

4548

title = self._get_text(shelf_renderer, 'title')

4549

yield self.url_result(shelf_url, video_title=title)

4550

# Shelf may not contain shelf URL, fallback to extraction from content

4551

yield from self._shelf_entries_from_content(shelf_renderer)

4552

4553

def _playlist_entries(self, video_list_renderer):

4554

for content in video_list_renderer['contents']:

4555

if not isinstance(content, dict):

4556

continue

4557

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4558

if not isinstance(renderer, dict):

4559

continue

4560

video_id = renderer.get('videoId')

4561

if not video_id:

4562

continue

4563

yield self._extract_video(renderer)

4564

4565

def _rich_entries(self, rich_grid_renderer):

4566

renderer = traverse_obj(

4567

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4568

video_id = renderer.get('videoId')

4569

if not video_id:

4570

return

4571

yield self._extract_video(renderer)

4572

4573

def _video_entry(self, video_renderer):

4574

video_id = video_renderer.get('videoId')

4575

if video_id:

4576

return self._extract_video(video_renderer)

4577

4578

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4579

url = urljoin('https://youtube.com', traverse_obj(

4580

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4581

if url:

4582

return self.url_result(

4583

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4584

4585

def _post_thread_entries(self, post_thread_renderer):

4586

post_renderer = try_get(

4587

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4588

if not post_renderer:

4589

return

4590

# video attachment

4591

video_renderer = try_get(

4592

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4593

video_id = video_renderer.get('videoId')

4594

if video_id:

4595

entry = self._extract_video(video_renderer)

4596

if entry:

4597

yield entry

4598

# playlist attachment

4599

playlist_id = try_get(

4600

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4601

if playlist_id:

4602

yield self.url_result(

4603

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4604

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4605

# inline video links

4606

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4607

for run in runs:

4608

if not isinstance(run, dict):

4609

continue

4610

ep_url = try_get(

4611

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4612

if not ep_url:

4613

continue

4614

if not YoutubeIE.suitable(ep_url):

4615

continue

4616

ep_video_id = YoutubeIE._match_id(ep_url)

4617

if video_id == ep_video_id:

4618

continue

4619

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4620

4621

def _post_thread_continuation_entries(self, post_thread_continuation):

4622

contents = post_thread_continuation.get('contents')

4623

if not isinstance(contents, list):

4624

return

4625

for content in contents:

4626

renderer = content.get('backstagePostThreadRenderer')

4627

if isinstance(renderer, dict):

4628

yield from self._post_thread_entries(renderer)

4629

continue

4630

renderer = content.get('videoRenderer')

4631

if isinstance(renderer, dict):

4632

yield self._video_entry(renderer)

4633

4634

r''' # unused

4635

def _rich_grid_entries(self, contents):

4636

for content in contents:

4637

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4638

if video_renderer:

4639

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4645

for url in traverse_obj(renderer, (

4646

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4647

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4648

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4649

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4650

4651

def _extract_entries(self, parent_renderer, continuation_list):

4652

# continuation_list is modified in-place with continuation_list = [continuation_token]

4653

continuation_list[:] = [None]

4654

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4655

for content in contents:

4656

if not isinstance(content, dict):

4657

continue

4658

is_renderer = traverse_obj(

4659

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4660

expected_type=dict)

4661

if not is_renderer:

4662

if content.get('richItemRenderer'):

4663

for entry in self._rich_entries(content['richItemRenderer']):

4664

yield entry

4665

continuation_list[0] = self._extract_continuation(parent_renderer)

4666

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4667

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4668

yield from self._report_history_entries(table)

4669

continuation_list[0] = self._extract_continuation(table)

4670

continue

4671

4672

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4673

for isr_content in isr_contents:

4674

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4679

'gridRenderer': self._grid_entries,

4680

'reelShelfRenderer': self._grid_entries,

4681

'shelfRenderer': self._shelf_entries,

4682

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4683

'backstagePostThreadRenderer': self._post_thread_entries,

4684

'videoRenderer': lambda x: [self._video_entry(x)],

4685

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4686

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4687

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4688

}

4689

for key, renderer in isr_content.items():

4690

if key not in known_renderers:

4691

continue

4692

for entry in known_renderers[key](renderer):

4693

if entry:

4694

yield entry

4695

continuation_list[0] = self._extract_continuation(renderer)

4696

break

4697

4698

if not continuation_list[0]:

4699

continuation_list[0] = self._extract_continuation(is_renderer)

4700

4701

if not continuation_list[0]:

4702

continuation_list[0] = self._extract_continuation(parent_renderer)

4703

4704

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4705

continuation_list = [None]

4706

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4707

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4712

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4713

yield from extract_entries(parent_renderer)

4714

continuation = continuation_list[0]

4715

4716

for page_num in itertools.count(1):

4717

if not continuation:

4718

break

4719

headers = self.generate_api_headers(

4720

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4721

response = self._extract_response(

4722

item_id=f'{item_id} page {page_num}',

4723

query=continuation, headers=headers, ytcfg=ytcfg,

4724

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4729

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4730

visitor_data = self._extract_visitor_data(response) or visitor_data

4731

4732

known_renderers = {

4733

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4734

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4735

'gridVideoRenderer': (self._grid_entries, 'items'),

4736

'gridChannelRenderer': (self._grid_entries, 'items'),

4737

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4738

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4739

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4740

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4741

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4742

'playlistVideoListContinuation': (self._playlist_entries, None),

4743

'gridContinuation': (self._grid_entries, None),

4744

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4745

'sectionListContinuation': (extract_entries, None), # for feeds

4746

}

4747

4748

continuation_items = traverse_obj(response, (

4749

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4750

'appendContinuationItemsAction', 'continuationItems'

4751

), 'continuationContents', get_all=False)

4752

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4753

4754

video_items_renderer = None

4755

for key in continuation_item.keys():

4756

if key not in known_renderers:

4757

continue

4758

func, parent_key = known_renderers[key]

4759

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4760

continuation_list = [None]

4761

yield from func(video_items_renderer)

4762

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4763

4764

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4769

for tab_renderer in tabs:

4770

if tab_renderer.get('selected'):

4771

return tab_renderer

4772

if fatal:

4773

raise ExtractorError('Unable to find selected tab')

4774

4775

@staticmethod

4776

def _extract_tab_renderers(response):

4777

return traverse_obj(

4778

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

4779

4780

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4781

metadata = self._extract_metadata_from_tabs(item_id, data)

4782

4783

selected_tab = self._extract_selected_tab(tabs)

4784

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

4785

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

4786

4787

return self.playlist_result(

4788

self._entries(

4789

selected_tab, metadata['id'], ytcfg,

4790

self._extract_account_syncid(ytcfg, data),

4791

self._extract_visitor_data(data, ytcfg)),

4792

**metadata)

4793

4794

def _extract_metadata_from_tabs(self, item_id, data):

4795

info = {'id': item_id}

4796

4797

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

4798

if metadata_renderer:

4799

info.update({

4800

'uploader': metadata_renderer.get('title'),

4801

'uploader_id': metadata_renderer.get('externalId'),

4802

'uploader_url': metadata_renderer.get('channelUrl'),

4803

})

4804

if info['uploader_id']:

4805

info['id'] = info['uploader_id']

4806

else:

4807

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

4808

4809

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4810

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4811

def _get_uncropped(url):

4812

return url_or_none((url or '').split('=')[0] + '=s0')

4813

4814

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

4815

if avatar_thumbnails:

4816

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4817

if uncropped_avatar:

4818

avatar_thumbnails.append({

4819

'url': uncropped_avatar,

4820

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4825

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

4826

for banner in channel_banners:

4827

banner['preference'] = -10

4828

4829

if channel_banners:

4830

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4831

if uncropped_banner:

4832

channel_banners.append({

4833

'url': uncropped_banner,

4834

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

4839

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4840

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

4841

4842

primary_thumbnails = self._extract_thumbnails(

4843

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4844

playlist_thumbnails = self._extract_thumbnails(

4845

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

4846

4847

info.update({

4848

'title': (traverse_obj(metadata_renderer, 'title')

4849

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

4850

or info['id']),

4851

'availability': self._extract_availability(data),

4852

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4853

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

4854

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

4855

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

4856

})

4857

4858

# Playlist stats is a text runs array containing [video count, view count, last updated].

4859

# last updated or (view count and last updated) may be missing.

4860

playlist_stats = get_first(

4861

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

4862

4863

last_updated_unix = self._parse_time_text(

4864

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

4865

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

4866

info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')

4867

4868

info['view_count'] = self._get_count(playlist_stats, 1)

4869

if info['view_count'] is None: # 0 is allowed

4870

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

4871

4872

info['playlist_count'] = self._get_count(playlist_stats, 0)

4873

if info['playlist_count'] is None: # 0 is allowed

4874

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

4875

4876

if not info.get('uploader_id'):

4877

owner = traverse_obj(playlist_header_renderer, 'ownerText')

4878

if not owner: # Deprecated

4879

owner = traverse_obj(

4880

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

4881

('videoOwner', 'videoOwnerRenderer', 'title'))

4882

owner_text = self._get_text(owner)

4883

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

4884

info.update({

4885

'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

4886

'uploader_id': browse_ep.get('browseId'),

4887

'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))

})

info.update({

'channel': info['uploader'],

4892

'channel_id': info['uploader_id'],

4893

'channel_url': info['uploader_url']

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4898

first_id = last_id = response = None

4899

for page_num in itertools.count(1):

4900

videos = list(self._playlist_entries(playlist))

4901

if not videos:

4902

return

4903

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4904

if start >= len(videos):

4905

return

4906

yield from videos[start:]

4907

first_id = first_id or videos[0]['id']

4908

last_id = videos[-1]['id']

4909

watch_endpoint = try_get(

4910

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4911

headers = self.generate_api_headers(

4912

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4913

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4914

query = {

4915

'playlistId': playlist_id,

4916

'videoId': watch_endpoint.get('videoId') or last_id,

4917

'index': watch_endpoint.get('index') or len(videos),

4918

'params': watch_endpoint.get('params') or 'OAE%3D'

4919

}

4920

response = self._extract_response(

4921

item_id='%s page %d' % (playlist_id, page_num),

4922

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4923

check_get_keys='contents'

4924

)

4925

playlist = try_get(

4926

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4927

4928

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4929

title = playlist.get('title') or try_get(

4930

data, lambda x: x['titleText']['simpleText'], str)

4931

playlist_id = playlist.get('playlistId') or item_id

4932

4933

# Delegating everything except mix playlists to regular tab-based playlist URL

4934

playlist_url = urljoin(url, try_get(

4935

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4936

str))

4937

4938

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4939

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4940

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4941

4942

if playlist_url and playlist_url != url and not is_known_unviewable:

4943

return self.url_result(

4944

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4945

video_title=title)

4946

4947

return self.playlist_result(

4948

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4949

playlist_id=playlist_id, playlist_title=title)

4950

4951

def _extract_availability(self, data):

4952

"""

4953

Gets the availability of a given playlist/tab.

4954

Note: Unless YouTube tells us explicitly, we do not assume it is public

4955

@param data: response

4956

"""

4957

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4958

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

4959

player_header_privacy = playlist_header_renderer.get('privacy')

4960

4961

badges = self._extract_badges(sidebar_renderer)

4962

4963

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4964

privacy_setting_icon = get_first(

4965

(playlist_header_renderer, sidebar_renderer),

4966

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4967

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4968

expected_type=str)

4969

4970

microformats_is_unlisted = traverse_obj(

4971

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4976

or player_header_privacy == 'PUBLIC'

4977

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4978

else self._availability(

4979

is_private=(

4980

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4981

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4982

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4983

is_unlisted=(

4984

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4985

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4986

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

4987

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

4988

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4989

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4994

sidebar_renderer = try_get(

4995

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4996

for item in sidebar_renderer:

4997

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

5002

"""

5003

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

5004

"""

5005

is_playlist = bool(traverse_obj(

5006

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

5007

if not is_playlist:

5008

return

5009

headers = self.generate_api_headers(

5010

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

5011

visitor_data=self._extract_visitor_data(data, ytcfg))

5012

query = {

5013

'params': 'wgYCCAA=',

5014

'browseId': f'VL{item_id}'

5015

}

5016

return self._extract_response(

5017

item_id=item_id, headers=headers, query=query,

5018

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

5019

note='Redownloading playlist API JSON with unavailable videos')

5020

5021

@functools.cached_property

5022

def skip_webpage(self):

5023

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

5024

5025

def _extract_webpage(self, url, item_id, fatal=True):

5026

webpage, data = None, None

5027

for retry in self.RetryManager(fatal=fatal):

5028

try:

5029

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

5030

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

5031

except ExtractorError as e:

5032

if isinstance(e.cause, network_exceptions):

5033

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

5034

retry.error = e

5035

continue

5036

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

5041

except ExtractorError as e:

5042

self._error_or_warning(e, fatal=fatal)

5043

break

5044

5045

# Sometimes youtube returns a webpage with incomplete ytInitialData

5046

# See: https://github.com/yt-dlp/yt-dlp/issues/116

5047

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

5048

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

5054

"""Use if failed to extract ytcfg (and data) from initial webpage"""

5055

if not ytcfg and self.is_authenticated:

5056

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

5057

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

5058

raise ExtractorError(

5059

f'{msg}. If you are not downloading private content, or '

5060

'your cookies are only for the first account and channel,'

5061

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

5062

expected=True)

5063

self.report_warning(msg, only_once=True)

5064

5065

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

5066

data = None

5067

if not self.skip_webpage:

5068

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

5069

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

5070

# Reject webpage data if redirected to home page without explicitly requesting

5071

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

5072

if (url != 'https://www.youtube.com/feed/recommended'

5073

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

5074

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

5075

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

5076

if fatal:

5077

raise ExtractorError(msg, expected=True)

5078

self.report_warning(msg, only_once=True)

5079

if not data:

5080

self._report_playlist_authcheck(ytcfg, fatal=fatal)

5081

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

5082

return data, ytcfg

5083

5084

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

5085

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

5086

resolve_response = self._extract_response(

5087

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

5088

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

5089

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

5090

for ep_key, ep in endpoints.items():

5091

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

5092

if params:

5093

return self._extract_response(

5094

item_id=item_id, query=params, ep=ep, headers=headers,

5095

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

5096

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

5097

err_note = 'Failed to resolve url (does the playlist exist?)'

5098

if fatal:

5099

raise ExtractorError(err_note, expected=True)

5100

self.report_warning(err_note, item_id)

5101

5102

_SEARCH_PARAMS = None

5103

5104

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

5105

data = {'query': query}

5106

if params is NO_DEFAULT:

5107

params = self._SEARCH_PARAMS

5108

if params:

5109

data['params'] = params

5110

5111

content_keys = (

5112

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

5113

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

5114

# ytmusic search

5115

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

5116

('continuationContents', ),

5117

)

5118

display_id = f'query "{query}"'

5119

check_get_keys = tuple({keys[0] for keys in content_keys})

5120

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

5121

self._report_playlist_authcheck(ytcfg, fatal=False)

5122

5123

continuation_list = [None]

5124

search = None

5125

for page_num in itertools.count(1):

5126

data.update(continuation_list[0] or {})

5127

headers = self.generate_api_headers(

5128

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

5129

search = self._extract_response(

5130

item_id=f'{display_id} page {page_num}', ep='search', query=data,

5131

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

5132

slr_contents = traverse_obj(search, *content_keys)

5133

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

5134

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

5139

IE_DESC = 'YouTube Tabs'

5140

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5149

(?P<not_channel>

5150

feed/|hashtag/|

5151

(?:playlist|watch)\?.*?\blist=

5152

)|

5153

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5158

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5159

}

5160

IE_NAME = 'youtube:tab'

5161

5162

_TESTS = [{

5163

'note': 'playlists, multipage',

5164

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5165

'playlist_mincount': 94,

5166

'info_dict': {

5167

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5168

'title': 'Igor Kleiner - Playlists',

5169

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5170

'uploader': 'Igor Kleiner',

5171

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5172

'channel': 'Igor Kleiner',

5173

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5174

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5175

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5176

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5177

'channel_follower_count': int

5178

},

5179

}, {

5180

'note': 'playlists, multipage, different order',

5181

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5182

'playlist_mincount': 94,

5183

'info_dict': {

5184

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5185

'title': 'Igor Kleiner - Playlists',

5186

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5187

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5188

'uploader': 'Igor Kleiner',

5189

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5190

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5191

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5192

'channel': 'Igor Kleiner',

5193

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5194

'channel_follower_count': int

5195

},

5196

}, {

5197

'note': 'playlists, series',

5198

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5199

'playlist_mincount': 5,

5200

'info_dict': {

5201

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5202

'title': '3Blue1Brown - Playlists',

5203

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5204

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5205

'uploader': '3Blue1Brown',

5206

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5207

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5208

'channel': '3Blue1Brown',

5209

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5210

'tags': ['Mathematics'],

5211

'channel_follower_count': int

5212

},

5213

}, {

5214

'note': 'playlists, singlepage',

5215

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5216

'playlist_mincount': 4,

5217

'info_dict': {

5218

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5219

'title': 'ThirstForScience - Playlists',

5220

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5221

'uploader': 'ThirstForScience',

5222

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5223

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5224

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5225

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5226

'tags': 'count:13',

5227

'channel': 'ThirstForScience',

5228

'channel_follower_count': int

5229

}

5230

}, {

5231

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5232

'only_matching': True,

5233

}, {

5234

'note': 'basic, single video playlist',

5235

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5236

'info_dict': {

5237

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5238

'uploader': 'Sergey M.',

5239

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5240

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5245

'channel': 'Sergey M.',

5246

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5247

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5248

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5249

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5254

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5255

'info_dict': {

5256

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5257

'uploader': 'Sergey M.',

5258

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5259

'title': 'youtube-dl empty playlist',

5260

'tags': [],

5261

'channel': 'Sergey M.',

5262

'description': '',

5263

'modified_date': '20160902',

5264

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5265

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5266

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5267

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5273

'info_dict': {

5274

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5275

'title': 'lex will - Home',

5276

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5277

'uploader': 'lex will',

5278

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5279

'channel': 'lex will',

5280

'tags': ['bible', 'history', 'prophesy'],

5281

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5282

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5283

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5284

'channel_follower_count': int

5285

},

5286

'playlist_mincount': 2,

5287

}, {

5288

'note': 'Videos tab',

5289

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5290

'info_dict': {

5291

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5292

'title': 'lex will - Videos',

5293

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5294

'uploader': 'lex will',

5295

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5296

'tags': ['bible', 'history', 'prophesy'],

5297

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5298

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5299

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5300

'channel': 'lex will',

5301

'channel_follower_count': int

5302

},

5303

'playlist_mincount': 975,

5304

}, {

5305

'note': 'Videos tab, sorted by popular',

5306

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5307

'info_dict': {

5308

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5309

'title': 'lex will - Videos',

5310

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5311

'uploader': 'lex will',

5312

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5313

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5314

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5315

'channel': 'lex will',

5316

'tags': ['bible', 'history', 'prophesy'],

5317

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5318

'channel_follower_count': int

5319

},

5320

'playlist_mincount': 199,

5321

}, {

5322

'note': 'Playlists tab',

5323

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5324

'info_dict': {

5325

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5326

'title': 'lex will - Playlists',

5327

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5328

'uploader': 'lex will',

5329

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5330

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5331

'channel': 'lex will',

5332

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5333

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5334

'tags': ['bible', 'history', 'prophesy'],

5335

'channel_follower_count': int

5336

},

5337

'playlist_mincount': 17,

5338

}, {

5339

'note': 'Community tab',

5340

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5341

'info_dict': {

5342

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5343

'title': 'lex will - Community',

5344

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5345

'uploader': 'lex will',

5346

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5347

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5348

'channel': 'lex will',

5349

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5350

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5351

'tags': ['bible', 'history', 'prophesy'],

5352

'channel_follower_count': int

5353

},

5354

'playlist_mincount': 18,

5355

}, {

5356

'note': 'Channels tab',

5357

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5358

'info_dict': {

5359

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5360

'title': 'lex will - Channels',

5361

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5362

'uploader': 'lex will',

5363

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5364

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5365

'channel': 'lex will',

5366

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5367

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5368

'tags': ['bible', 'history', 'prophesy'],

5369

'channel_follower_count': int

5370

},

5371

'playlist_mincount': 12,

5372

}, {

5373

'note': 'Search tab',

5374

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5375

'playlist_mincount': 40,

5376

'info_dict': {

5377

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5378

'title': '3Blue1Brown - Search - linear algebra',

5379

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5380

'uploader': '3Blue1Brown',

5381

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5382

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5383

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5384

'tags': ['Mathematics'],

5385

'channel': '3Blue1Brown',

5386

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5387

'channel_follower_count': int

5388

},

5389

}, {

5390

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5391

'only_matching': True,

5392

}, {

5393

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5394

'only_matching': True,

5395

}, {

5396

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5397

'only_matching': True,

5398

}, {

5399

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5400

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5401

'info_dict': {

5402

'title': '29C3: Not my department',

5403

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5404

'uploader': 'Christiaan008',

5405

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5406

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5407

'tags': [],

5408

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5409

'view_count': int,

5410

'modified_date': '20150605',

5411

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5412

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5413

'channel': 'Christiaan008',

5414

'availability': 'public',

5415

},

5416

'playlist_count': 96,

5417

}, {

5418

'note': 'Large playlist',

5419

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5420

'info_dict': {

5421

'title': 'Uploads from Cauchemar',

5422

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5423

'uploader': 'Cauchemar',

5424

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5425

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5426

'tags': [],

5427

'modified_date': r're:\d{8}',

5428

'channel': 'Cauchemar',

5429

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5430

'view_count': int,

5431

'description': '',

5432

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5433

'availability': 'public',

5434

},

5435

'playlist_mincount': 1123,

5436

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5437

}, {

5438

'note': 'even larger playlist, 8832 videos',

5439

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5440

'only_matching': True,

5441

}, {

5442

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5443

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5444

'info_dict': {

5445

'title': 'Uploads from Interstellar Movie',

5446

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5447

'uploader': 'Interstellar Movie',

5448

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5449

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5450

'tags': [],

5451

'view_count': int,

5452

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5453

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5454

'channel': 'Interstellar Movie',

5455

'description': '',

5456

'modified_date': r're:\d{8}',

5457

'availability': 'public',

5458

},

5459

'playlist_mincount': 21,

5460

}, {

5461

'note': 'Playlist with "show unavailable videos" button',

5462

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5463

'info_dict': {

5464

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5465

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5466

'uploader': 'Phim Siêu Nhân Nhật Bản',

5467

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5468

'view_count': int,

5469

'channel': 'Phim Siêu Nhân Nhật Bản',

5470

'tags': [],

5471

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5472

'description': '',

5473

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5474

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5475

'modified_date': r're:\d{8}',

5476

'availability': 'public',

5477

},

5478

'playlist_mincount': 200,

5479

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5480

}, {

5481

'note': 'Playlist with unavailable videos in page 7',

5482

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5483

'info_dict': {

5484

'title': 'Uploads from BlankTV',

5485

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5486

'uploader': 'BlankTV',

5487

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5488

'channel': 'BlankTV',

5489

'channel_url': 'https://www.youtube.com/c/blanktv',

5490

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5491

'view_count': int,

5492

'tags': [],

5493

'uploader_url': 'https://www.youtube.com/c/blanktv',

5494

'modified_date': r're:\d{8}',

5495

'description': '',

5496

'availability': 'public',

5497

},

5498

'playlist_mincount': 1000,

5499

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5500

}, {

5501

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5502

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5503

'info_dict': {

5504

'title': 'Data Analysis with Dr Mike Pound',

5505

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5506

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5507

'uploader': 'Computerphile',

5508

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5509

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5510

'tags': [],

5511

'view_count': int,

5512

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5513

'channel_url': 'https://www.youtube.com/user/Computerphile',

5514

'channel': 'Computerphile',

5515

'availability': 'public',

5516

'modified_date': '20190712',

5517

},

5518

'playlist_mincount': 11,

5519

}, {

5520

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5521

'only_matching': True,

5522

}, {

5523

'note': 'Playlist URL that does not actually serve a playlist',

5524

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5529

'uploader': 'STREEM',

5530

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5531

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5532

'upload_date': '20150526',

5533

'license': 'Standard YouTube License',

5534

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5535

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5542

},

5543

'skip': 'This video is not available.',

5544

'add_ie': [YoutubeIE.ie_key()],

5545

}, {

5546

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5547

'only_matching': True,

5548

}, {

5549

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5550

'only_matching': True,

5551

}, {

5552

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5553

'info_dict': {

5554

'id': 'Wq15eF5vCbI', # This will keep changing

5555

'ext': 'mp4',

5556

'title': str,

5557

'uploader': 'Sky News',

5558

'uploader_id': 'skynews',

5559

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5560

'upload_date': r're:\d{8}',

5561

'description': str,

5562

'categories': ['News & Politics'],

5563

'tags': list,

5564

'like_count': int,

5565

'release_timestamp': int,

5566

'channel': 'Sky News',

5567

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5568

'age_limit': 0,

5569

'view_count': int,

5570

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5571

'playable_in_embed': True,

5572

'release_date': r're:\d+',

5573

'availability': 'public',

5574

'live_status': 'is_live',

5575

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5576

'channel_follower_count': int,

5577

'concurrent_view_count': int,

5578

},

5579

'params': {

5580

'skip_download': True,

5581

},

5582

'expected_warnings': ['Ignoring subtitle tracks found in '],

5583

}, {

5584

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5589

'uploader': 'The Young Turks',

5590

'uploader_id': 'TheYoungTurks',

5591

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5592

'upload_date': '20150715',

5593

'license': 'Standard YouTube License',

5594

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5595

'categories': ['News & Politics'],

5596

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5601

},

5602

'only_matching': True,

5603

}, {

5604

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5605

'only_matching': True,

5606

}, {

5607

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5608

'only_matching': True,

5609

}, {

5610

'note': 'A channel that is not live. Should raise error',

5611

'url': 'https://www.youtube.com/user/numberphile/live',

5612

'only_matching': True,

5613

}, {

5614

'url': 'https://www.youtube.com/feed/trending',

5615

'only_matching': True,

5616

}, {

5617

'url': 'https://www.youtube.com/feed/library',

5618

'only_matching': True,

5619

}, {

5620

'url': 'https://www.youtube.com/feed/history',

5621

'only_matching': True,

5622

}, {

5623

'url': 'https://www.youtube.com/feed/subscriptions',

5624

'only_matching': True,

5625

}, {

5626

'url': 'https://www.youtube.com/feed/watch_later',

5627

'only_matching': True,

5628

}, {

5629

'note': 'Recommended - redirects to home page.',

5630

'url': 'https://www.youtube.com/feed/recommended',

5631

'only_matching': True,

5632

}, {

5633

'note': 'inline playlist with not always working continuations',

5634

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5635

'only_matching': True,

5636

}, {

5637

'url': 'https://www.youtube.com/course',

5638

'only_matching': True,

5639

}, {

5640

'url': 'https://www.youtube.com/zsecurity',

5641

'only_matching': True,

5642

}, {

5643

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5644

'only_matching': True,

5645

}, {

5646

'url': 'https://www.youtube.com/TheYoungTurks/live',

5647

'only_matching': True,

5648

}, {

5649

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5656

}, {

5657

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5658

'only_matching': True,

5659

}, {

5660

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5661

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5662

'only_matching': True

5663

}, {

5664

'note': '/browse/ should redirect to /channel/',

5665

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5666

'only_matching': True

5667

}, {

5668

'note': 'VLPL, should redirect to playlist?list=PL...',

5669

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5670

'info_dict': {

5671

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5672

'uploader': 'NoCopyrightSounds',

5673

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5674

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5675

'title': 'NCS : All Releases 💿',

5676

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5677

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5678

'modified_date': r're:\d{8}',

5679

'view_count': int,

5680

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5681

'tags': [],

5682

'channel': 'NoCopyrightSounds',

5683

'availability': 'public',

5684

},

5685

'playlist_mincount': 166,

5686

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5687

}, {

5688

'note': 'Topic, should redirect to playlist?list=UU...',

5689

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5690

'info_dict': {

5691

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5692

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5693

'title': 'Uploads from Royalty Free Music - Topic',

5694

'uploader': 'Royalty Free Music - Topic',

5695

'tags': [],

5696

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5697

'channel': 'Royalty Free Music - Topic',

5698

'view_count': int,

5699

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5700

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5701

'modified_date': r're:\d{8}',

5702

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5703

'description': '',

5704

'availability': 'public',

5705

},

5706

'playlist_mincount': 101,

5707

}, {

5708

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5709

# Treat as a general feed

5710

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5711

'info_dict': {

5712

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5713

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5714

'tags': [],

5715

},

5716

'playlist_mincount': 9,

5717

}, {

5718

'note': 'Youtube music Album',

5719

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5720

'info_dict': {

5721

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5722

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5727

'modified_date': r're:\d{8}',

5728

},

5729

'playlist_count': 50,

5730

}, {

5731

'note': 'unlisted single video playlist',

5732

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5733

'info_dict': {

5734

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5735

'uploader': 'colethedj',

5736

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5737

'title': 'yt-dlp unlisted playlist test',

5738

'availability': 'unlisted',

5739

'tags': [],

5740

'modified_date': '20220418',

5741

'channel': 'colethedj',

5742

'view_count': int,

5743

'description': '',

5744

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5745

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5746

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5751

'url': 'https://www.youtube.com/feed/recommended',

5752

'info_dict': {

5753

'id': 'recommended',

5754

'title': 'recommended',

5755

'tags': [],

5756

},

5757

'playlist_mincount': 50,

5758

'params': {

5759

'skip_download': True,

5760

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5761

},

5762

}, {

5763

'note': 'API Fallback: /videos tab, sorted by oldest first',

5764

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5765

'info_dict': {

5766

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5767

'title': 'Cody\'sLab - Videos',

5768

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5769

'uploader': 'Cody\'sLab',

5770

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5771

'channel': 'Cody\'sLab',

5772

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5773

'tags': [],

5774

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5775

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5776

'channel_follower_count': int

5777

},

5778

'playlist_mincount': 650,

5779

'params': {

5780

'skip_download': True,

5781

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5782

},

5783

'skip': 'Query for sorting no longer works',

5784

}, {

5785

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5786

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5787

'info_dict': {

5788

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5789

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5790

'title': 'Uploads from Royalty Free Music - Topic',

5791

'uploader': 'Royalty Free Music - Topic',

5792

'modified_date': r're:\d{8}',

5793

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5794

'description': '',

5795

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5796

'tags': [],

5797

'channel': 'Royalty Free Music - Topic',

5798

'view_count': int,

5799

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5800

'availability': 'public',

5801

},

5802

'playlist_mincount': 101,

5803

'params': {

5804

'skip_download': True,

5805

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5806

},

5807

}, {

5808

'note': 'non-standard redirect to regional channel',

5809

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5810

'only_matching': True

5811

}, {

5812

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5813

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5814

'info_dict': {

5815

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5816

'modified_date': '20220407',

5817

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5818

'tags': [],

5819

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5820

'uploader': 'pukkandan',

5821

'availability': 'unlisted',

5822

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5823

'channel': 'pukkandan',

5824

'description': 'Test for collaborative playlist',

5825

'title': 'yt-dlp test - collaborative playlist',

5826

'view_count': int,

5827

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5828

},

5829

'playlist_mincount': 2

5830

}, {

5831

'note': 'translated tab name',

5832

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5833

'info_dict': {

5834

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5835

'tags': [],

5836

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5837

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5838

'description': 'test description',

5839

'title': 'cole-dlp-test-acc - 再生リスト',

5840

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5841

'uploader': 'cole-dlp-test-acc',

5842

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5843

'channel': 'cole-dlp-test-acc',

5844

},

5845

'playlist_mincount': 1,

5846

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5847

'expected_warnings': ['Preferring "ja"'],

5848

}, {

5849

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5850

'note': 'preferred lang set with playlist with translated video titles',

5851

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5852

'info_dict': {

5853

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5854

'tags': [],

5855

'view_count': int,

5856

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5857

'uploader': 'cole-dlp-test-acc',

5858

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5859

'channel': 'cole-dlp-test-acc',

5860

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5861

'description': 'test',

5862

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5863

'title': 'dlp test playlist',

5864

'availability': 'public',

5865

},

5866

'playlist_mincount': 1,

5867

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5868

'expected_warnings': ['Preferring "ja"'],

5869

}, {

5870

# shorts audio pivot for 2GtVksBMYFM.

5871

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5872

'info_dict': {

5873

'id': 'sfv_audio_pivot',

5874

'title': 'sfv_audio_pivot',

5875

'tags': [],

5876

},

5877

'playlist_mincount': 50,

5878

5879

}, {

5880

# Channel with a real live tab (not to be mistaken with streams tab)

5881

# Do not treat like it should redirect to live stream

5882

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

5883

'info_dict': {

5884

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

5885

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

5886

'tags': [],

5887

},

5888

'playlist_mincount': 20,

5889

}, {

5890

# Tab name is not the same as tab id

5891

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

5892

'info_dict': {

5893

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5894

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

5895

'tags': [],

5896

},

5897

'playlist_mincount': 8,

5898

}, {

5899

# Home tab id is literally home. Not to get mistaken with featured

5900

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

5901

'info_dict': {

5902

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5903

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

5904

'tags': [],

5905

},

5906

'playlist_mincount': 8,

5907

}, {

5908

# Should get three playlists for videos, shorts and streams tabs

5909

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5910

'info_dict': {

5911

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5912

'title': 'Polka Ch. 尾丸ポルカ',

5913

'channel_follower_count': int,

5914

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5915

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5916

'uploader': 'Polka Ch. 尾丸ポルカ',

5917

'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',

5918

'channel': 'Polka Ch. 尾丸ポルカ',

5919

'tags': 'count:35',

5920

'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5921

'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

5926

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

5927

'info_dict': {

5928

'id': 'UC0intLFzLaudFG-xAvUEO-A',

5929

'title': 'Not Just Bikes - Shorts',

5930

'tags': 'count:12',

5931

'uploader': 'Not Just Bikes',

5932

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5933

'description': 'md5:7513148b1f02b924783157d84c4ea555',

5934

'channel_follower_count': int,

5935

'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',

5936

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

5937

'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5938

'channel': 'Not Just Bikes',

5939

},

5940

'playlist_mincount': 10,

5941

}, {

5942

# Streams tab

5943

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

5944

'info_dict': {

5945

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5946

'title': '中村悠一 - Live',

5947

'tags': 'count:7',

5948

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5949

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5950

'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5951

'channel': '中村悠一',

5952

'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5953

'channel_follower_count': int,

5954

'uploader': '中村悠一',

5955

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

5956

},

5957

'playlist_mincount': 60,

5958

}, {

5959

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

5960

# See test_youtube_lists

5961

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

5962

'only_matching': True,

5963

}, {

5964

# No uploads and no UCID given. Should fail with no uploads error

5965

# See test_youtube_lists

5966

'url': 'https://www.youtube.com/news',

5967

'only_matching': True

5968

}, {

5969

# No videos tab but has a shorts tab

5970

'url': 'https://www.youtube.com/c/TKFShorts',

5971

'info_dict': {

5972

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5973

'title': 'Shorts Break - Shorts',

5974

'tags': 'count:32',

5975

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5976

'channel': 'Shorts Break',

5977

'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',

5978

'uploader': 'Shorts Break',

5979

'channel_follower_count': int,

5980

'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5981

'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5982

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5983

},

5984

'playlist_mincount': 30,

5985

}, {

5986

# Trending Now Tab. tab id is empty

5987

'url': 'https://www.youtube.com/feed/trending',

5988

'info_dict': {

5989

'id': 'trending',

5990

'title': 'trending - Now',

5991

'tags': [],

5992

},

5993

'playlist_mincount': 30,

5994

}, {

5995

# Trending Gaming Tab. tab id is empty

5996

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

5997

'info_dict': {

5998

'id': 'trending',

5999

'title': 'trending - Gaming',

6000

'tags': [],

6001

},

6002

'playlist_mincount': 30,

6003

}, {

6004

# Shorts url result in shorts tab

6005

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

6006

'info_dict': {

6007

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6008

'title': 'cole-dlp-test-acc - Shorts',

6009

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6010

'channel': 'cole-dlp-test-acc',

6011

'description': 'test description',

6012

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6013

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6014

'tags': [],

6015

'uploader': 'cole-dlp-test-acc',

6016

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

6024

'id': 'sSM9J5YH_60',

6025

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6026

'title': 'SHORT short',

6027

'channel': 'cole-dlp-test-acc',

6028

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

6034

}, {

6035

# Live video status should be extracted

6036

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

6037

'info_dict': {

6038

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

6039

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

6047

'id': str,

6048

'title': str,

6049

'live_status': 'is_live',

6050

'channel_id': str,

6051

'channel_url': str,

6052

'concurrent_view_count': int,

'channel': str,

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6057

'playlist_mincount': 1

6058

}, {

6059

# Channel renderer metadata. Contains number of videos on the channel

6060

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',

6061

'info_dict': {

6062

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6063

'title': 'cole-dlp-test-acc - Channels',

6064

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6065

'channel': 'cole-dlp-test-acc',

6066

'description': 'test description',

6067

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6068

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6069

'tags': [],

6070

'uploader': 'cole-dlp-test-acc',

6071

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'YoutubeTab',

6078

'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6079

'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6080

'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6081

'title': 'PewDiePie',

6082

'channel': 'PewDiePie',

6083

'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6084

'thumbnails': list,

6085

'channel_follower_count': int,

6086

'playlist_count': int

6087

}

6088

}],

6089

'params': {'extract_flat': True},

}]

@classmethod

def suitable(cls, url):

6094

return False if YoutubeIE.suitable(url) else super().suitable(url)

6095

6096

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

6097

6098

def _get_url_mobj(self, url):

6099

mobj = self._URL_RE.match(url).groupdict()

6100

mobj.update((k, '') for k, v in mobj.items() if v is None)

6101

return mobj

6102

6103

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

6104

tab_name = (tab.get('title') or '').lower()

6105

tab_url = urljoin(base_url, traverse_obj(

6106

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

6107

6108

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

6109

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

6110

if tab_id:

6111

return {

6112

'TAB_ID_SPONSORSHIPS': 'membership',

6113

}.get(tab_id, tab_id), tab_name

6114

6115

# Fallback to tab name if we cannot get the tab id.

6116

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

6117

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

6118

if tab_name:

6119

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

6124

6125

def _has_tab(self, tabs, tab_id):

6126

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

6127

6128

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

6129

def _real_extract(self, url, smuggled_data):

6130

item_id = self._match_id(url)

6131

url = urllib.parse.urlunparse(

6132

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

6133

compat_opts = self.get_param('compat_opts', [])

6134

6135

mobj = self._get_url_mobj(url)

6136

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

6137

if is_channel and smuggled_data.get('is_music_url'):

6138

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

6139

return self.url_result(

6140

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

6141

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

6142

mdata = self._extract_tab_endpoint(

6143

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

6144

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

6145

get_all=False, expected_type=str)

6146

if not murl:

6147

raise ExtractorError('Failed to resolve album to playlist')

6148

return self.url_result(murl, YoutubeTabIE)

6149

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

6150

return self.url_result(

6151

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

6152

6153

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

6154

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

6155

url = f'{pre}/videos{post}'

6156

6157

# Handle both video/playlist URLs

6158

qs = parse_qs(url)

6159

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

6160

if not video_id and mobj['not_channel'].startswith('watch'):

6161

if not playlist_id:

6162

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

6163

raise ExtractorError('A video URL was given without video ID', expected=True)

6164

# Common mistake: https://www.youtube.com/watch?list=playlist_id

6165

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

6166

return self.url_result(

6167

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

6168

6169

if not self._yes_playlist(playlist_id, video_id):

6170

return self.url_result(

6171

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6172

6173

data, ytcfg = self._extract_data(url, display_id)

6174

6175

# YouTube may provide a non-standard redirect to the regional channel

6176

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6177

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6178

redirect_url = traverse_obj(

6179

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6180

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6181

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6182

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6183

return self.url_result(redirect_url, YoutubeTabIE)

6184

6185

tabs, extra_tabs = self._extract_tab_renderers(data), []

6186

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6187

selected_tab = self._extract_selected_tab(tabs)

6188

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6189

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6190

6191

if not original_tab_id and selected_tab_name:

6192

self.to_screen('Downloading all uploads of the channel. '

6193

'To download only the videos in a specific tab, pass the tab\'s URL')

6194

if self._has_tab(tabs, 'streams'):

6195

extra_tabs.append(''.join((pre, '/streams', post)))

6196

if self._has_tab(tabs, 'shorts'):

6197

extra_tabs.append(''.join((pre, '/shorts', post)))

6198

# XXX: Members-only tab should also be extracted

6199

6200

if not extra_tabs and selected_tab_id != 'videos':

6201

# Channel does not have streams, shorts or videos tabs

6202

if item_id[:2] != 'UC':

6203

raise ExtractorError('This channel has no uploads', expected=True)

6204

6205

# Topic channels don't have /videos. Use the equivalent playlist instead

6206

pl_id = f'UU{item_id[2:]}'

6207

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6208

try:

6209

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6210

except ExtractorError:

6211

raise ExtractorError('This channel has no uploads', expected=True)

6212

else:

6213

item_id, url = pl_id, pl_url

6214

self.to_screen(

6215

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6216

6217

elif extra_tabs and selected_tab_id != 'videos':

6218

# When there are shorts/live tabs but not videos tab

6219

url, data = f'{pre}{post}', None

6220

6221

elif (original_tab_id or 'videos') != selected_tab_id:

6222

if original_tab_id == 'live':

6223

# Live tab should have redirected to the video

6224

# Except in the case the channel has an actual live tab

6225

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6226

raise UserNotLive(video_id=item_id)

6227

elif selected_tab_name:

6228

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6229

6230

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6231

url = f'{pre}{post}'

6232

6233

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6234

if 'no-youtube-unavailable-videos' not in compat_opts:

6235

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6236

self._extract_and_report_alerts(data, only_once=True)

6237

6238

tabs, entries = self._extract_tab_renderers(data), []

6239

if tabs:

6240

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6241

entries[0].update({

6242

'extractor_key': YoutubeTabIE.ie_key(),

6243

'extractor': YoutubeTabIE.IE_NAME,

6244

'webpage_url': url,

6245

})

6246

if self.get_param('playlist_items') == '0':

6247

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6248

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6249

entries.extend(map(self._real_extract, extra_tabs))

6250

6251

if len(entries) == 1:

6252

return entries[0]

6253

elif entries:

6254

metadata = self._extract_metadata_from_tabs(item_id, data)

6255

uploads_url = 'the Uploads (UU) playlist URL'

6256

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6257

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6258

self.to_screen(

6259

'Downloading as multiple playlists, separated by tabs. '

6260

f'To download as a single playlist instead, pass {uploads_url}')

6261

return self.playlist_result(entries, item_id, **metadata)

6262

6263

# Inline playlist

6264

playlist = traverse_obj(

6265

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6266

if playlist:

6267

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6268

6269

video_id = traverse_obj(

6270

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6271

if video_id:

6272

if tab != '/live': # live tab is expected to redirect to video

6273

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6274

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6275

6276

raise ExtractorError('Unable to recognize tab page')

6277

6278

6279

class YoutubePlaylistIE(InfoExtractor):

6280

IE_DESC = 'YouTube playlists'

6281

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6292

)''' % {

6293

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6294

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6295

}

6296

IE_NAME = 'youtube:playlist'

6297

_TESTS = [{

6298

'note': 'issue #673',

6299

'url': 'PLBB231211A4F62143',

6300

'info_dict': {

6301

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6302

'id': 'PLBB231211A4F62143',

6303

'uploader': 'Wickman',

6304

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6305

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6306

'view_count': int,

6307

'uploader_url': 'https://www.youtube.com/c/WickmanVT',

6308

'modified_date': r're:\d{8}',

6309

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6310

'channel': 'Wickman',

6311

'tags': [],

6312

'channel_url': 'https://www.youtube.com/c/WickmanVT',

6313

'availability': 'public',

6314

},

6315

'playlist_mincount': 29,

6316

}, {

6317

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6318

'info_dict': {

6319

'title': 'YDL_safe_search',

6320

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6321

},

6322

'playlist_count': 2,

6323

'skip': 'This playlist is private',

6324

}, {

6325

'note': 'embedded',

6326

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6331

'uploader': 'milan',

6332

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6333

'description': '',

6334

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6335

'tags': [],

6336

'modified_date': '20140919',

6337

'view_count': int,

6338

'channel': 'milan',

6339

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6340

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6341

'availability': 'public',

6342

},

6343

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],

6344

}, {

6345

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6346

'playlist_mincount': 455,

6347

'info_dict': {

6348

'title': '2018 Chinese New Singles (11/6 updated)',

6349

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6350

'uploader': 'LBK',

6351

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6352

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6353

'channel': 'LBK',

6354

'view_count': int,

6355

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

6356

'tags': [],

6357

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

6358

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6359

'modified_date': r're:\d{8}',

6360

'availability': 'public',

6361

},

6362

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6363

}, {

6364

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6365

'only_matching': True,

6366

}, {

6367

# music album playlist

6368

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6369

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6374

if YoutubeTabIE.suitable(url):

6375

return False

6376

from ..utils import parse_qs

6377

qs = parse_qs(url)

6378

if qs.get('v', [None])[0]:

6379

return False

6380

return super().suitable(url)

6381

6382

def _real_extract(self, url):

6383

playlist_id = self._match_id(url)

6384

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6385

url = update_url_query(

6386

'https://www.youtube.com/playlist',

6387

parse_qs(url) or {'list': playlist_id})

6388

if is_music_url:

6389

url = smuggle_url(url, {'is_music_url': True})

6390

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6391

6392

6393

class YoutubeYtBeIE(InfoExtractor):

6394

IE_DESC = 'youtu.be'

6395

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6396

_TESTS = [{

6397

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6402

'uploader': 'Backus-Page House Museum',

6403

'uploader_id': 'backuspagemuseum',

6404

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

6405

'upload_date': '20161008',

6406

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6407

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6412

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

6413

'channel': 'Backus-Page House Museum',

6414

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6415

'live_status': 'not_live',

6416

'view_count': int,

6417

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6418

'availability': 'public',

6419

'duration': 59,

6420

'comment_count': int,

6421

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6426

},

6427

}, {

6428

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6429

'only_matching': True,

6430

}]

6431

6432

def _real_extract(self, url):

6433

mobj = self._match_valid_url(url)

6434

video_id = mobj.group('id')

6435

playlist_id = mobj.group('playlist_id')

6436

return self.url_result(

6437

update_url_query('https://www.youtube.com/watch', {

6438

'v': video_id,

6439

'list': playlist_id,

6440

'feature': 'youtu.be',

6441

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6442

6443

6444

class YoutubeLivestreamEmbedIE(InfoExtractor):

6445

IE_DESC = 'YouTube livestream embeds'

6446

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6447

_TESTS = [{

6448

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6449

'only_matching': True,

6450

}]

6451

6452

def _real_extract(self, url):

6453

channel_id = self._match_id(url)

6454

return self.url_result(

6455

f'https://www.youtube.com/channel/{channel_id}/live',

6456

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6457

6458

6459

class YoutubeYtUserIE(InfoExtractor):

6460

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6461

IE_NAME = 'youtube:user'

6462

_VALID_URL = r'ytuser:(?P<id>.+)'

6463

_TESTS = [{

6464

'url': 'ytuser:phihag',

6465

'only_matching': True,

6466

}]

6467

6468

def _real_extract(self, url):

6469

user_id = self._match_id(url)

6470

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6471

6472

6473

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6474

IE_NAME = 'youtube:favorites'

6475

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6476

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6477

_LOGIN_REQUIRED = True

6478

_TESTS = [{

6479

'url': ':ytfav',

6480

'only_matching': True,

6481

}, {

6482

'url': ':ytfavorites',

6483

'only_matching': True,

6484

}]

6485

6486

def _real_extract(self, url):

6487

return self.url_result(

6488

'https://www.youtube.com/playlist?list=LL',

6489

ie=YoutubeTabIE.ie_key())

6490

6491

6492

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6493

IE_NAME = 'youtube:notif'

6494

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6495

_VALID_URL = r':ytnotif(?:ication)?s?'

6496

_LOGIN_REQUIRED = True

6497

_TESTS = [{

6498

'url': ':ytnotif',

6499

'only_matching': True,

6500

}, {

6501

'url': ':ytnotifications',

6502

'only_matching': True,

6503

}]

6504

6505

def _extract_notification_menu(self, response, continuation_list):

6506

notification_list = traverse_obj(

6507

response,

6508

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6509

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6510

expected_type=list) or []

6511

continuation_list[0] = None

6512

for item in notification_list:

6513

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6514

if entry:

6515

yield entry

6516

continuation = item.get('continuationItemRenderer')

6517

if continuation:

6518

continuation_list[0] = continuation

6519

6520

def _extract_notification_renderer(self, notification):

6521

video_id = traverse_obj(

6522

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6523

url = f'https://www.youtube.com/watch?v={video_id}'

6524

channel_id = None

6525

if not video_id:

6526

browse_ep = traverse_obj(

6527

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6528

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6529

post_id = self._search_regex(

6530

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6531

'post id', default=None)

6532

if not channel_id or not post_id:

6533

return

6534

# The direct /post url redirects to this in the browser

6535

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6536

6537

channel = traverse_obj(

6538

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6539

expected_type=str)

6540

notification_title = self._get_text(notification, 'shortMessage')

6541

if notification_title:

6542

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6543

# TODO: handle recommended videos

6544

title = self._search_regex(

6545

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6546

'video title', default=None)

6547

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6548

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6554

'video_id': video_id,

6555

'title': title,

6556

'channel_id': channel_id,

6557

'channel': channel,

6558

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6559

'timestamp': timestamp,

6560

}

6561

6562

def _notification_menu_entries(self, ytcfg):

6563

continuation_list = [None]

6564

response = None

6565

for page in itertools.count(1):

6566

ctoken = traverse_obj(

6567

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6568

response = self._extract_response(

6569

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6570

ep='notification/get_notification_menu', check_get_keys='actions',

6571

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6572

yield from self._extract_notification_menu(response, continuation_list)

6573

if not continuation_list[0]:

6574

break

6575

6576

def _real_extract(self, url):

6577

display_id = 'notifications'

6578

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6579

self._report_playlist_authcheck(ytcfg)

6580

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6581

6582

6583

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6584

IE_DESC = 'YouTube search'

6585

IE_NAME = 'youtube:search'

6586

_SEARCH_KEY = 'ytsearch'

6587

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6588

_TESTS = [{

6589

'url': 'ytsearch5:youtube-dl test video',

6590

'playlist_count': 5,

6591

'info_dict': {

6592

'id': 'youtube-dl test video',

6593

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6599

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6600

_SEARCH_KEY = 'ytsearchdate'

6601

IE_DESC = 'YouTube search, newest videos first'

6602

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6603

_TESTS = [{

6604

'url': 'ytsearchdate5:youtube-dl test video',

6605

'playlist_count': 5,

6606

'info_dict': {

6607

'id': 'youtube-dl test video',

6608

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6614

IE_DESC = 'YouTube search URLs with sorting and filter support'

6615

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6616

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6617

_TESTS = [{

6618

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6619

'playlist_mincount': 5,

6620

'info_dict': {

6621

'id': 'youtube-dl test video',

6622

'title': 'youtube-dl test video',

6623

}

6624

}, {

6625

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6626

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6633

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6638

# 'entries': [{

6639

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

# Channel results

'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',

6646

'info_dict': {

6647

'id': 'kurzgesagt',

6648

'title': 'kurzgesagt',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6654

'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

6655

'ie_key': 'YoutubeTab',

6656

'channel': 'Kurzgesagt – In a Nutshell',

6657

'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',

6658

'title': 'Kurzgesagt – In a Nutshell',

6659

'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6660

'playlist_count': int, # XXX: should have a way of saying > 1

6661

'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

'thumbnails': list

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6666

'playlist_mincount': 1,

6667

}, {

6668

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6669

'only_matching': True,

6670

}]

6671

6672

def _real_extract(self, url):

6673

qs = parse_qs(url)

6674

query = (qs.get('search_query') or qs.get('q'))[0]

6675

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6676

6677

6678

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6679

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6680

IE_NAME = 'youtube:music:search_url'

6681

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6682

_TESTS = [{

6683

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6684

'playlist_count': 16,

6685

'info_dict': {

6686

'id': 'royalty free music',

6687

'title': 'royalty free music',

6688

}

6689

}, {

6690

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6691

'playlist_mincount': 30,

6692

'info_dict': {

6693

'id': 'royalty free music - songs',

6694

'title': 'royalty free music - songs',

6695

},

6696

'params': {'extract_flat': 'in_playlist'}

6697

}, {

6698

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6699

'playlist_mincount': 30,

6700

'info_dict': {

6701

'id': 'royalty free music - community playlists',

6702

'title': 'royalty free music - community playlists',

6703

},

6704

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6709

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6710

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6711

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6712

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6713

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6714

}

6715

6716

def _real_extract(self, url):

6717

qs = parse_qs(url)

6718

query = (qs.get('search_query') or qs.get('q'))[0]

6719

params = qs.get('sp', (None,))[0]

6720

if params:

6721

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6722

else:

6723

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6724

params = self._SECTIONS.get(section)

6725

if not params:

6726

section = None

6727

title = join_nonempty(query, section, delim=' - ')

6728

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6729

6730

6731

class YoutubeFeedsInfoExtractor(InfoExtractor):

6732

"""

6733

Base class for feed extractors

6734

Subclasses must re-define the _FEED_NAME property.

6735

"""

6736

_LOGIN_REQUIRED = True

6737

_FEED_NAME = 'feeds'

6738

6739

def _real_initialize(self):

6740

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6745

6746

def _real_extract(self, url):

6747

return self.url_result(

6748

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6749

6750

6751

class YoutubeWatchLaterIE(InfoExtractor):

6752

IE_NAME = 'youtube:watchlater'

6753

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6754

_VALID_URL = r':ytwatchlater'

6755

_TESTS = [{

6756

'url': ':ytwatchlater',

6757

'only_matching': True,

6758

}]

6759

6760

def _real_extract(self, url):

6761

return self.url_result(

6762

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6763

6764

6765

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6766

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6767

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6768

_FEED_NAME = 'recommended'

6769

_LOGIN_REQUIRED = False

6770

_TESTS = [{

6771

'url': ':ytrec',

6772

'only_matching': True,

6773

}, {

6774

'url': ':ytrecommended',

6775

'only_matching': True,

6776

}, {

6777

'url': 'https://youtube.com',

6778

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6783

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6784

_VALID_URL = r':ytsub(?:scription)?s?'

6785

_FEED_NAME = 'subscriptions'

6786

_TESTS = [{

6787

'url': ':ytsubs',

6788

'only_matching': True,

6789

}, {

6790

'url': ':ytsubscriptions',

6791

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6796

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6797

_VALID_URL = r':ythis(?:tory)?'

6798

_FEED_NAME = 'history'

6799

_TESTS = [{

6800

'url': ':ythistory',

6801

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6806

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6807

IE_NAME = 'youtube:stories'

6808

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6809

_TESTS = [{

6810

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6811

'only_matching': True,

6812

}]

6813

6814

def _real_extract(self, url):

6815

playlist_id = f'RLTD{self._match_id(url)}'

6816

return self.url_result(

6817

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6818

ie=YoutubeTabIE, video_id=playlist_id)

6819

6820

6821

class YoutubeShortsAudioPivotIE(InfoExtractor):

6822

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6823

IE_NAME = 'youtube:shorts:pivot:audio'

6824

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6825

_TESTS = [{

6826

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6827

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6832

"""

6833

Generates sfv_audio_pivot browse params for this video id

6834

"""

6835

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6836

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6837

6838

def _real_extract(self, url):

6839

video_id = self._match_id(url)

6840

return self.url_result(

6841

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6846

IE_NAME = 'youtube:truncated_url'

6847

IE_DESC = False # Do not list

6848

_VALID_URL = r'''(?x)

6849

(?:https?://)?

6850

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6851

(?:watch\?(?:

6852

feature=[a-z_]+|

6853

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6866

'only_matching': True,

6867

}, {

6868

'url': 'https://www.youtube.com/watch?',

6869

'only_matching': True,

6870

}, {

6871

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6872

'only_matching': True,

6873

}, {

6874

'url': 'https://www.youtube.com/watch?feature=foo',

6875

'only_matching': True,

6876

}, {

6877

'url': 'https://www.youtube.com/watch?hl=en-GB',

6878

'only_matching': True,

6879

}, {

6880

'url': 'https://www.youtube.com/watch?t=2372',

6881

'only_matching': True,

6882

}]

6883

6884

def _real_extract(self, url):

6885

raise ExtractorError(

6886

'Did you forget to quote the URL? Remember that & is a meta '

6887

'character in most shells, so you want to put the URL in quotes, '

6888

'like youtube-dl '

6889

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6890

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6895

IE_NAME = 'youtube:clip'

6896

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6897

_TESTS = [{

6898

# FIXME: Other metadata should be extracted from the clip, not from the base video

6899

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6900

'info_dict': {

6901

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6902

'ext': 'mp4',

6903

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6908

'categories': ['Gaming'],

6909

'channel': 'Scott The Woz',

6910

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6911

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6912

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6913

'like_count': int,

6914

'playable_in_embed': True,

6915

'tags': 'count:17',

6916

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6917

'title': 'Mobile Games on Console - Scott The Woz',

6918

'upload_date': '20210920',

6919

'uploader': 'Scott The Woz',

6920

'uploader_id': 'scottthewoz',

6921

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6922

'view_count': int,

6923

'live_status': 'not_live',

6924

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6929

clip_id = self._match_id(url)

6930

_, data = self._extract_webpage(url, clip_id)

6931

6932

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6933

if not video_id:

6934

raise ExtractorError('Unable to find video ID')

6935

6936

clip_data = traverse_obj(data, (

6937

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6938

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6939

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6940

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6941

6942

return {

6943

'_type': 'url_transparent',

6944

'url': f'https://www.youtube.com/watch?v={video_id}',

6945

'ie_key': YoutubeIE.ie_key(),

6946

'id': clip_id,

6947

'section_start': int(clip_data['startTimeMs']) / 1000,

6948

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6953

IE_NAME = 'youtube:truncated_id'

6954

IE_DESC = False # Do not list

6955

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6956

6957

_TESTS = [{

6958

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6959

'only_matching': True,

6960

}]

6961

6962

def _real_extract(self, url):

6963

video_id = self._match_id(url)

6964

raise ExtractorError(

6965

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6966

expected=True)