jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import collections
	4	import copy
	5	import datetime
	6	import enum
	7	import hashlib
	8	import itertools
	9	import json
	10	import math
	11	import os.path
	12	import random
	13	import re
	14	import sys
	15	import threading
	16	import time
	17	import traceback
	18	import urllib.error
	19	import urllib.parse
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from .openload import PhantomJSwrapper
	23	from ..compat import functools
	24	from ..jsinterp import JSInterpreter
	25	from ..utils import (
	26	NO_DEFAULT,
	27	ExtractorError,
	28	LazyList,
	29	UserNotLive,
	30	bug_reports_message,
	31	classproperty,
	32	clean_html,
	33	datetime_from_str,
	34	dict_get,
	35	filter_dict,
	36	float_or_none,
	37	format_field,
	38	get_first,
	39	int_or_none,
	40	is_html,
	41	join_nonempty,
	42	js_to_json,
	43	mimetype2ext,
	44	network_exceptions,
	45	orderedSet,
	46	parse_codecs,
	47	parse_count,
	48	parse_duration,
	49	parse_iso8601,
	50	parse_qs,
	51	qualities,
	52	remove_start,
	53	smuggle_url,
	54	str_or_none,
	55	str_to_int,
	56	strftime_or_none,
	57	traverse_obj,
	58	try_get,
	59	unescapeHTML,
	60	unified_strdate,
	61	unified_timestamp,
	62	unsmuggle_url,
	63	update_url_query,
	64	url_or_none,
	65	urljoin,
	66	variadic,
	67	)
	68
	69	# any clients starting with _ cannot be explicitly requested by the user
	70	INNERTUBE_CLIENTS = {
	71	'web': {
	72	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	73	'INNERTUBE_CONTEXT': {
	74	'client': {
	75	'clientName': 'WEB',
	76	'clientVersion': '2.20220801.00.00',
	77	}
	78	},
	79	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	80	},
	81	'web_embedded': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB_EMBEDDED_PLAYER',
	86	'clientVersion': '1.20220731.00.00',
	87	},
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	90	},
	91	'web_music': {
	92	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	93	'INNERTUBE_HOST': 'music.youtube.com',
	94	'INNERTUBE_CONTEXT': {
	95	'client': {
	96	'clientName': 'WEB_REMIX',
	97	'clientVersion': '1.20220727.01.00',
	98	}
	99	},
	100	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	101	},
	102	'web_creator': {
	103	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_CREATOR',
	107	'clientVersion': '1.20220726.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	111	},
	112	'android': {
	113	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'ANDROID',
	117	'clientVersion': '17.31.35',
	118	'androidSdkVersion': 30,
	119	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	120	}
	121	},
	122	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	123	'REQUIRE_JS_PLAYER': False
	124	},
	125	'android_embedded': {
	126	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	127	'INNERTUBE_CONTEXT': {
	128	'client': {
	129	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	130	'clientVersion': '17.31.35',
	131	'androidSdkVersion': 30,
	132	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	133	},
	134	},
	135	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	136	'REQUIRE_JS_PLAYER': False
	137	},
	138	'android_music': {
	139	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	140	'INNERTUBE_CONTEXT': {
	141	'client': {
	142	'clientName': 'ANDROID_MUSIC',
	143	'clientVersion': '5.16.51',
	144	'androidSdkVersion': 30,
	145	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	146	}
	147	},
	148	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	149	'REQUIRE_JS_PLAYER': False
	150	},
	151	'android_creator': {
	152	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	153	'INNERTUBE_CONTEXT': {
	154	'client': {
	155	'clientName': 'ANDROID_CREATOR',
	156	'clientVersion': '22.30.100',
	157	'androidSdkVersion': 30,
	158	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	159	},
	160	},
	161	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	162	'REQUIRE_JS_PLAYER': False
	163	},
	164	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	165	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	166	'ios': {
	167	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	168	'INNERTUBE_CONTEXT': {
	169	'client': {
	170	'clientName': 'IOS',
	171	'clientVersion': '17.33.2',
	172	'deviceModel': 'iPhone14,3',
	173	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '17.33.2',
	184	'deviceModel': 'iPhone14,3',
	185	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '5.21',
	197	'deviceModel': 'iPhone14,3',
	198	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	199	},
	200	},
	201	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	202	'REQUIRE_JS_PLAYER': False
	203	},
	204	'ios_creator': {
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'IOS_CREATOR',
	208	'clientVersion': '22.33.101',
	209	'deviceModel': 'iPhone14,3',
	210	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	211	},
	212	},
	213	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	214	'REQUIRE_JS_PLAYER': False
	215	},
	216	# mweb has 'ultralow' formats
	217	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	218	'mweb': {
	219	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	220	'INNERTUBE_CONTEXT': {
	221	'client': {
	222	'clientName': 'MWEB',
	223	'clientVersion': '2.20220801.00.00',
	224	}
	225	},
	226	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	227	},
	228	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	229	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	230	'tv_embedded': {
	231	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	232	'INNERTUBE_CONTEXT': {
	233	'client': {
	234	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	235	'clientVersion': '2.0',
	236	},
	237	},
	238	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	239	},
	240	}
	241
	242
	243	def _split_innertube_client(client_name):
	244	variant, *base = client_name.rsplit('.', 1)
	245	if base:
	246	return variant, base[0], variant
	247	base, *variant = client_name.split('_', 1)
	248	return client_name, base, variant[0] if variant else None
	249
	250
	251	def build_innertube_clients():
	252	THIRD_PARTY = {
	253	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	254	}
	255	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	256	priority = qualities(BASE_CLIENTS[::-1])
	257
	258	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	259	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	260	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	261	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	262	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	263
	264	_, base_client, variant = _split_innertube_client(client)
	265	ytcfg['priority'] = 10 * priority(base_client)
	266
	267	if not variant:
	268	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	269	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	270	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	271	embedscreen['priority'] -= 3
	272	elif variant == 'embedded':
	273	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	274	ytcfg['priority'] -= 2
	275	else:
	276	ytcfg['priority'] -= 3
	277
	278
	279	build_innertube_clients()
	280
	281
	282	class BadgeType(enum.Enum):
	283	AVAILABILITY_UNLISTED = enum.auto()
	284	AVAILABILITY_PRIVATE = enum.auto()
	285	AVAILABILITY_PUBLIC = enum.auto()
	286	AVAILABILITY_PREMIUM = enum.auto()
	287	AVAILABILITY_SUBSCRIPTION = enum.auto()
	288	LIVE_NOW = enum.auto()
	289
	290
	291	class YoutubeBaseInfoExtractor(InfoExtractor):
	292	"""Provide base functions for Youtube extractors"""
	293
	294	_RESERVED_NAMES = (
	295	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	296	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	297	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	298	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	299
	300	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	301
	302	# _NETRC_MACHINE = 'youtube'
	303
	304	# If True it will raise an error if no login info is provided
	305	_LOGIN_REQUIRED = False
	306
	307	_INVIDIOUS_SITES = (
	308	# invidious-redirect websites
	309	r'(?:www\.)?redirect\.invidious\.io',
	310	r'(?:(?:www\|dev)\.)?invidio\.us',
	311	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	312	r'(?:www\.)?invidious\.pussthecat\.org',
	313	r'(?:www\.)?invidious\.zee\.li',
	314	r'(?:www\.)?invidious\.ethibox\.fr',
	315	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	316	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	317	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	318	# youtube-dl invidious instances list
	319	r'(?:(?:www\|no)\.)?invidiou\.sh',
	320	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	321	r'(?:www\.)?invidious\.kabi\.tk',
	322	r'(?:www\.)?invidious\.mastodon\.host',
	323	r'(?:www\.)?invidious\.zapashcanon\.fr',
	324	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	325	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	326	r'(?:www\.)?invidious\.himiko\.cloud',
	327	r'(?:www\.)?invidious\.reallyancient\.tech',
	328	r'(?:www\.)?invidious\.tube',
	329	r'(?:www\.)?invidiou\.site',
	330	r'(?:www\.)?invidious\.site',
	331	r'(?:www\.)?invidious\.xyz',
	332	r'(?:www\.)?invidious\.nixnet\.xyz',
	333	r'(?:www\.)?invidious\.048596\.xyz',
	334	r'(?:www\.)?invidious\.drycat\.fr',
	335	r'(?:www\.)?inv\.skyn3t\.in',
	336	r'(?:www\.)?tube\.poal\.co',
	337	r'(?:www\.)?tube\.connect\.cafe',
	338	r'(?:www\.)?vid\.wxzm\.sx',
	339	r'(?:www\.)?vid\.mint\.lgbt',
	340	r'(?:www\.)?vid\.puffyan\.us',
	341	r'(?:www\.)?yewtu\.be',
	342	r'(?:www\.)?yt\.elukerio\.org',
	343	r'(?:www\.)?yt\.lelux\.fi',
	344	r'(?:www\.)?invidious\.ggc-project\.de',
	345	r'(?:www\.)?yt\.maisputain\.ovh',
	346	r'(?:www\.)?ytprivate\.com',
	347	r'(?:www\.)?invidious\.13ad\.de',
	348	r'(?:www\.)?invidious\.toot\.koeln',
	349	r'(?:www\.)?invidious\.fdn\.fr',
	350	r'(?:www\.)?watch\.nettohikari\.com',
	351	r'(?:www\.)?invidious\.namazso\.eu',
	352	r'(?:www\.)?invidious\.silkky\.cloud',
	353	r'(?:www\.)?invidious\.exonip\.de',
	354	r'(?:www\.)?invidious\.riverside\.rocks',
	355	r'(?:www\.)?invidious\.blamefran\.net',
	356	r'(?:www\.)?invidious\.moomoo\.de',
	357	r'(?:www\.)?ytb\.trom\.tf',
	358	r'(?:www\.)?yt\.cyberhost\.uk',
	359	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	360	r'(?:www\.)?qklhadlycap4cnod\.onion',
	361	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	362	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	363	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	364	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	365	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	366	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	367	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	368	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	369	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	370	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	371	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	372	r'(?:www\.)?piped\.kavin\.rocks',
	373	r'(?:www\.)?piped\.tokhmi\.xyz',
	374	r'(?:www\.)?piped\.syncpundit\.io',
	375	r'(?:www\.)?piped\.mha\.fi',
	376	r'(?:www\.)?watch\.whatever\.social',
	377	r'(?:www\.)?piped\.garudalinux\.org',
	378	r'(?:www\.)?piped\.rivo\.lol',
	379	r'(?:www\.)?piped-libre\.kavin\.rocks',
	380	r'(?:www\.)?yt\.jae\.fi',
	381	r'(?:www\.)?piped\.mint\.lgbt',
	382	r'(?:www\.)?il\.ax',
	383	r'(?:www\.)?piped\.esmailelbob\.xyz',
	384	r'(?:www\.)?piped\.projectsegfau\.lt',
	385	r'(?:www\.)?piped\.privacydev\.net',
	386	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	387	r'(?:www\.)?piped\.smnz\.de',
	388	r'(?:www\.)?piped\.adminforge\.de',
	389	r'(?:www\.)?watch\.whatevertinfoil\.de',
	390	r'(?:www\.)?piped\.qdi\.fi',
	391	r'(?:www\.)?piped\.video',
	392	r'(?:www\.)?piped\.aeong\.one',
	393	)
	394
	395	# extracted from account/account_menu ep
	396	# XXX: These are the supported YouTube UI and API languages,
	397	# which is slightly different from languages supported for translation in YouTube studio
	398	_SUPPORTED_LANG_CODES = [
	399	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	400	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	401	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	402	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	403	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	404	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	405	]
	406
	407	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	408
	409	@functools.cached_property
	410	def _preferred_lang(self):
	411	"""
	412	Returns a language code supported by YouTube for the user preferred language.
	413	Returns None if no preferred language set.
	414	"""
	415	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	416	if not preferred_lang:
	417	return
	418	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	419	raise ExtractorError(
	420	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	421	expected=True)
	422	elif preferred_lang != 'en':
	423	self.report_warning(
	424	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	425	return preferred_lang
	426
	427	def _initialize_consent(self):
	428	cookies = self._get_cookies('https://www.youtube.com/')
	429	if cookies.get('__Secure-3PSID'):
	430	return
	431	consent_id = None
	432	consent = cookies.get('CONSENT')
	433	if consent:
	434	if 'YES' in consent.value:
	435	return
	436	consent_id = self._search_regex(
	437	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	438	if not consent_id:
	439	consent_id = random.randint(100, 999)
	440	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	441
	442	def _initialize_pref(self):
	443	cookies = self._get_cookies('https://www.youtube.com/')
	444	pref_cookie = cookies.get('PREF')
	445	pref = {}
	446	if pref_cookie:
	447	try:
	448	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	449	except ValueError:
	450	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	451	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	452	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	453
	454	def _real_initialize(self):
	455	self._initialize_pref()
	456	self._initialize_consent()
	457	self._check_login_required()
	458
	459	def _check_login_required(self):
	460	if self._LOGIN_REQUIRED and not self._cookies_passed:
	461	self.raise_login_required('Login details are needed to download this content', method='cookies')
	462
	463	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	464	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	465
	466	def _get_default_ytcfg(self, client='web'):
	467	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	468
	469	def _get_innertube_host(self, client='web'):
	470	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	471
	472	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	473	# try_get but with fallback to default ytcfg client values when present
	474	_func = lambda y: try_get(y, getter, expected_type)
	475	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	476
	477	def _extract_client_name(self, ytcfg, default_client='web'):
	478	return self._ytcfg_get_safe(
	479	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	480	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	481
	482	def _extract_client_version(self, ytcfg, default_client='web'):
	483	return self._ytcfg_get_safe(
	484	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	485	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	486
	487	def _select_api_hostname(self, req_api_hostname, default_client=None):
	488	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	489	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	490
	491	def _extract_api_key(self, ytcfg=None, default_client='web'):
	492	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	493
	494	def _extract_context(self, ytcfg=None, default_client='web'):
	495	context = get_first(
	496	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	497	# Enforce language and tz for extraction
	498	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	499	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	500	return context

1

import base64

import calendar

import collections

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

22

from .openload import PhantomJSwrapper

23

from ..compat import functools

24

from ..jsinterp import JSInterpreter

25

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

70

INNERTUBE_CLIENTS = {

71

'web': {

72

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

73

'INNERTUBE_CONTEXT': {

74

'client': {

75

'clientName': 'WEB',

76

'clientVersion': '2.20220801.00.00',

77

}

78

},

79

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

80

},

81

'web_embedded': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB_EMBEDDED_PLAYER',

86

'clientVersion': '1.20220731.00.00',

87

},

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

90

},

91

'web_music': {

92

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

93

'INNERTUBE_HOST': 'music.youtube.com',

94

'INNERTUBE_CONTEXT': {

95

'client': {

96

'clientName': 'WEB_REMIX',

97

'clientVersion': '1.20220727.01.00',

98

}

99

},

100

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

101

},

102

'web_creator': {

103

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_CREATOR',

107

'clientVersion': '1.20220726.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

111

},

112

'android': {

113

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'ANDROID',

117

'clientVersion': '17.31.35',

118

'androidSdkVersion': 30,

119

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

120

}

121

},

122

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

123

'REQUIRE_JS_PLAYER': False

124

},

125

'android_embedded': {

126

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

127

'INNERTUBE_CONTEXT': {

128

'client': {

129

'clientName': 'ANDROID_EMBEDDED_PLAYER',

130

'clientVersion': '17.31.35',

131

'androidSdkVersion': 30,

132

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

133

},

134

},

135

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

136

'REQUIRE_JS_PLAYER': False

137

},

138

'android_music': {

139

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

140

'INNERTUBE_CONTEXT': {

141

'client': {

142

'clientName': 'ANDROID_MUSIC',

143

'clientVersion': '5.16.51',

144

'androidSdkVersion': 30,

145

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

146

}

147

},

148

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

149

'REQUIRE_JS_PLAYER': False

150

},

151

'android_creator': {

152

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

153

'INNERTUBE_CONTEXT': {

154

'client': {

155

'clientName': 'ANDROID_CREATOR',

156

'clientVersion': '22.30.100',

157

'androidSdkVersion': 30,

158

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

159

},

160

},

161

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

162

'REQUIRE_JS_PLAYER': False

163

},

164

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

165

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

166

'ios': {

167

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

168

'INNERTUBE_CONTEXT': {

169

'client': {

170

'clientName': 'IOS',

171

'clientVersion': '17.33.2',

172

'deviceModel': 'iPhone14,3',

173

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '17.33.2',

184

'deviceModel': 'iPhone14,3',

185

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '5.21',

197

'deviceModel': 'iPhone14,3',

198

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

199

},

200

},

201

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

202

'REQUIRE_JS_PLAYER': False

203

},

204

'ios_creator': {

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'IOS_CREATOR',

208

'clientVersion': '22.33.101',

209

'deviceModel': 'iPhone14,3',

210

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

211

},

212

},

213

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

214

'REQUIRE_JS_PLAYER': False

215

},

216

# mweb has 'ultralow' formats

217

# See: https://github.com/yt-dlp/yt-dlp/pull/557

218

'mweb': {

219

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

220

'INNERTUBE_CONTEXT': {

221

'client': {

222

'clientName': 'MWEB',

223

'clientVersion': '2.20220801.00.00',

224

}

225

},

226

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

227

},

228

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

229

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

230

'tv_embedded': {

231

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

232

'INNERTUBE_CONTEXT': {

233

'client': {

234

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

235

'clientVersion': '2.0',

236

},

237

},

238

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

244

variant, *base = client_name.rsplit('.', 1)

245

if base:

246

return variant, base[0], variant

247

base, *variant = client_name.split('_', 1)

248

return client_name, base, variant[0] if variant else None

249

250

251

def build_innertube_clients():

252

THIRD_PARTY = {

253

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

254

}

255

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

256

priority = qualities(BASE_CLIENTS[::-1])

257

258

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

259

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

260

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

261

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

262

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

263

264

_, base_client, variant = _split_innertube_client(client)

265

ytcfg['priority'] = 10 * priority(base_client)

266

267

if not variant:

268

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

269

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

270

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

271

embedscreen['priority'] -= 3

272

elif variant == 'embedded':

273

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

274

ytcfg['priority'] -= 2

275

else:

276

ytcfg['priority'] -= 3

277

278

279

build_innertube_clients()

280

281

282

class BadgeType(enum.Enum):

283

AVAILABILITY_UNLISTED = enum.auto()

284

AVAILABILITY_PRIVATE = enum.auto()

285

AVAILABILITY_PUBLIC = enum.auto()

286

AVAILABILITY_PREMIUM = enum.auto()

287

AVAILABILITY_SUBSCRIPTION = enum.auto()

288

LIVE_NOW = enum.auto()

289

290

291

class YoutubeBaseInfoExtractor(InfoExtractor):

292

"""Provide base functions for Youtube extractors"""

293

294

_RESERVED_NAMES = (

295

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

301

302

# _NETRC_MACHINE = 'youtube'

303

304

# If True it will raise an error if no login info is provided

305

_LOGIN_REQUIRED = False

306

307

_INVIDIOUS_SITES = (

308

# invidious-redirect websites

309

r'(?:www\.)?redirect\.invidious\.io',

310

r'(?:(?:www|dev)\.)?invidio\.us',

311

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

312

r'(?:www\.)?invidious\.pussthecat\.org',

313

r'(?:www\.)?invidious\.zee\.li',

314

r'(?:www\.)?invidious\.ethibox\.fr',

315

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

316

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

317

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

318

# youtube-dl invidious instances list

319

r'(?:(?:www|no)\.)?invidiou\.sh',

320

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

321

r'(?:www\.)?invidious\.kabi\.tk',

322

r'(?:www\.)?invidious\.mastodon\.host',

323

r'(?:www\.)?invidious\.zapashcanon\.fr',

324

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

325

r'(?:www\.)?invidious\.tinfoil-hat\.net',

326

r'(?:www\.)?invidious\.himiko\.cloud',

327

r'(?:www\.)?invidious\.reallyancient\.tech',

328

r'(?:www\.)?invidious\.tube',

329

r'(?:www\.)?invidiou\.site',

330

r'(?:www\.)?invidious\.site',

331

r'(?:www\.)?invidious\.xyz',

332

r'(?:www\.)?invidious\.nixnet\.xyz',

333

r'(?:www\.)?invidious\.048596\.xyz',

334

r'(?:www\.)?invidious\.drycat\.fr',

335

r'(?:www\.)?inv\.skyn3t\.in',

336

r'(?:www\.)?tube\.poal\.co',

337

r'(?:www\.)?tube\.connect\.cafe',

338

r'(?:www\.)?vid\.wxzm\.sx',

339

r'(?:www\.)?vid\.mint\.lgbt',

340

r'(?:www\.)?vid\.puffyan\.us',

341

r'(?:www\.)?yewtu\.be',

342

r'(?:www\.)?yt\.elukerio\.org',

343

r'(?:www\.)?yt\.lelux\.fi',

344

r'(?:www\.)?invidious\.ggc-project\.de',

345

r'(?:www\.)?yt\.maisputain\.ovh',

346

r'(?:www\.)?ytprivate\.com',

347

r'(?:www\.)?invidious\.13ad\.de',

348

r'(?:www\.)?invidious\.toot\.koeln',

349

r'(?:www\.)?invidious\.fdn\.fr',

350

r'(?:www\.)?watch\.nettohikari\.com',

351

r'(?:www\.)?invidious\.namazso\.eu',

352

r'(?:www\.)?invidious\.silkky\.cloud',

353

r'(?:www\.)?invidious\.exonip\.de',

354

r'(?:www\.)?invidious\.riverside\.rocks',

355

r'(?:www\.)?invidious\.blamefran\.net',

356

r'(?:www\.)?invidious\.moomoo\.de',

357

r'(?:www\.)?ytb\.trom\.tf',

358

r'(?:www\.)?yt\.cyberhost\.uk',

359

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

360

r'(?:www\.)?qklhadlycap4cnod\.onion',

361

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

362

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

363

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

364

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

365

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

366

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

367

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

368

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

369

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

370

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

371

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

372

r'(?:www\.)?piped\.kavin\.rocks',

373

r'(?:www\.)?piped\.tokhmi\.xyz',

374

r'(?:www\.)?piped\.syncpundit\.io',

375

r'(?:www\.)?piped\.mha\.fi',

376

r'(?:www\.)?watch\.whatever\.social',

377

r'(?:www\.)?piped\.garudalinux\.org',

378

r'(?:www\.)?piped\.rivo\.lol',

379

r'(?:www\.)?piped-libre\.kavin\.rocks',

380

r'(?:www\.)?yt\.jae\.fi',

381

r'(?:www\.)?piped\.mint\.lgbt',

382

r'(?:www\.)?il\.ax',

383

r'(?:www\.)?piped\.esmailelbob\.xyz',

384

r'(?:www\.)?piped\.projectsegfau\.lt',

385

r'(?:www\.)?piped\.privacydev\.net',

386

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

387

r'(?:www\.)?piped\.smnz\.de',

388

r'(?:www\.)?piped\.adminforge\.de',

389

r'(?:www\.)?watch\.whatevertinfoil\.de',

390

r'(?:www\.)?piped\.qdi\.fi',

391

r'(?:www\.)?piped\.video',

392

r'(?:www\.)?piped\.aeong\.one',

393

)

394

395

# extracted from account/account_menu ep

396

# XXX: These are the supported YouTube UI and API languages,

397

# which is slightly different from languages supported for translation in YouTube studio

398

_SUPPORTED_LANG_CODES = [

399

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

400

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

401

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

402

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

403

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

404

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

405

]

406

407

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

408

409

@functools.cached_property

410

def _preferred_lang(self):

411

"""

412

Returns a language code supported by YouTube for the user preferred language.

413

Returns None if no preferred language set.

414

"""

415

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

416

if not preferred_lang:

417

return

418

if preferred_lang not in self._SUPPORTED_LANG_CODES:

419

raise ExtractorError(

420

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

421

expected=True)

422

elif preferred_lang != 'en':

423

self.report_warning(

424

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

425

return preferred_lang

426

427

def _initialize_consent(self):

428

cookies = self._get_cookies('https://www.youtube.com/')

429

if cookies.get('__Secure-3PSID'):

430

return

431

consent_id = None

432

consent = cookies.get('CONSENT')

433

if consent:

434

if 'YES' in consent.value:

435

return

436

consent_id = self._search_regex(

437

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

438

if not consent_id:

439

consent_id = random.randint(100, 999)

440

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

441

442

def _initialize_pref(self):

443

cookies = self._get_cookies('https://www.youtube.com/')

444

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

449

except ValueError:

450

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

451

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

452

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

453

454

def _real_initialize(self):

455

self._initialize_pref()

456

self._initialize_consent()

457

self._check_login_required()

458

459

def _check_login_required(self):

460

if self._LOGIN_REQUIRED and not self._cookies_passed:

461

self.raise_login_required('Login details are needed to download this content', method='cookies')

462

463

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

464

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

465

466

def _get_default_ytcfg(self, client='web'):

467

return copy.deepcopy(INNERTUBE_CLIENTS[client])

468

469

def _get_innertube_host(self, client='web'):

470

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

471

472

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

473

# try_get but with fallback to default ytcfg client values when present

474

_func = lambda y: try_get(y, getter, expected_type)

475

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

476

477

def _extract_client_name(self, ytcfg, default_client='web'):

478

return self._ytcfg_get_safe(

479

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

480

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

481

482

def _extract_client_version(self, ytcfg, default_client='web'):

483

return self._ytcfg_get_safe(

484

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

485

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

486

487

def _select_api_hostname(self, req_api_hostname, default_client=None):

488

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

489

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

490

491

def _extract_api_key(self, ytcfg=None, default_client='web'):

492

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

493

494

def _extract_context(self, ytcfg=None, default_client='web'):

495

context = get_first(

496

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

497

# Enforce language and tz for extraction

498

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

499

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

505

time_now = round(time.time())

506

if self._SAPISID is None:

507

yt_cookies = self._get_cookies('https://www.youtube.com')

508

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

509

# See: https://github.com/yt-dlp/yt-dlp/issues/393

510

sapisid_cookie = dict_get(

511

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

512

if sapisid_cookie and sapisid_cookie.value:

513

self._SAPISID = sapisid_cookie.value

514

self.write_debug('Extracted SAPISID cookie')

515

# SAPISID cookie is required if not already present

516

if not yt_cookies.get('SAPISID'):

517

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

518

self._set_cookie(

519

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

520

else:

521

self._SAPISID = False

522

if not self._SAPISID:

523

return None

524

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

525

sapisidhash = hashlib.sha1(

526

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

527

return f'SAPISIDHASH {time_now}_{sapisidhash}'

528

529

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

530

note='Downloading API JSON', errnote='Unable to download API page',

531

context=None, api_key=None, api_hostname=None, default_client='web'):

532

533

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

534

data.update(query)

535

real_headers = self.generate_api_headers(default_client=default_client)

536

real_headers.update({'content-type': 'application/json'})

537

if headers:

538

real_headers.update(headers)

539

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

540

or api_key or self._extract_api_key(default_client=default_client))

541

return self._download_json(

542

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

543

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

544

data=json.dumps(data).encode('utf8'), headers=real_headers,

545

query={'key': api_key, 'prettyPrint': 'false'})

546

547

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

548

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

549

550

@staticmethod

551

def _extract_session_index(*data):

552

"""

553

Index of current account in account list.

554

See: https://github.com/yt-dlp/yt-dlp/pull/519

555

"""

556

for ytcfg in data:

557

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

558

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

563

if ytcfg:

564

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

569

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

570

'identity token', default=None, fatal=False)

571

572

@staticmethod

573

def _extract_account_syncid(*args):

574

"""

575

Extract syncId required to download private playlists of secondary channels

576

@params response and/or ytcfg

577

"""

578

for data in args:

579

# ytcfg includes channel_syncid if on secondary channel

580

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

585

lambda x: x['DATASYNC_ID']), str) or '').split('||')

586

if len(sync_ids) >= 2 and sync_ids[1]:

587

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

588

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

593

"""

594

Extracts visitorData from an API response or ytcfg

595

Appears to be used to track session state

596

"""

597

return get_first(

598

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

599

expected_type=str)

600

601

@functools.cached_property

602

def is_authenticated(self):

603

return bool(self._generate_sapisidhash_header())

604

605

def extract_ytcfg(self, video_id, webpage):

606

if not webpage:

607

return {}

608

return self._parse_json(

609

self._search_regex(

610

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

611

default='{}'), video_id, fatal=False) or {}

612

613

def generate_api_headers(

614

self, *, ytcfg=None, account_syncid=None, session_index=None,

615

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

616

617

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

618

headers = {

619

'X-YouTube-Client-Name': str(

620

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

621

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

622

'Origin': origin,

623

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

624

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

625

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

626

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

627

}

628

if session_index is None:

629

session_index = self._extract_session_index(ytcfg)

630

if account_syncid or session_index is not None:

631

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

632

633

auth = self._generate_sapisidhash_header(origin)

634

if auth is not None:

635

headers['Authorization'] = auth

636

headers['X-Origin'] = origin

637

return filter_dict(headers)

638

639

def _download_ytcfg(self, client, video_id):

640

url = {

641

'web': 'https://www.youtube.com',

642

'web_music': 'https://music.youtube.com',

643

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

648

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

649

return self.extract_ytcfg(video_id, webpage) or {}

650

651

@staticmethod

652

def _build_api_continuation_query(continuation, ctp=None):

653

query = {

654

'continuation': continuation

655

}

656

# TODO: Inconsistency with clickTrackingParams.

657

# Currently we have a fixed ctp contained within context (from ytcfg)

658

# and a ctp in root query for continuation.

659

if ctp:

660

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

665

next_continuation = try_get(

666

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

667

lambda x: x['continuation']['reloadContinuationData']), dict)

668

if not next_continuation:

669

return

670

continuation = next_continuation.get('continuation')

671

if not continuation:

672

return

673

ctp = next_continuation.get('clickTrackingParams')

674

return cls._build_api_continuation_query(continuation, ctp)

675

676

@classmethod

677

def _extract_continuation_ep_data(cls, continuation_ep: dict):

678

if isinstance(continuation_ep, dict):

679

continuation = try_get(

680

continuation_ep, lambda x: x['continuationCommand']['token'], str)

681

if not continuation:

682

return

683

ctp = continuation_ep.get('clickTrackingParams')

684

return cls._build_api_continuation_query(continuation, ctp)

685

686

@classmethod

687

def _extract_continuation(cls, renderer):

688

next_continuation = cls._extract_next_continuation_data(renderer)

689

if next_continuation:

690

return next_continuation

691

692

return traverse_obj(renderer, (

693

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

694

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

695

), get_all=False, expected_type=cls._extract_continuation_ep_data)

696

697

@classmethod

698

def _extract_alerts(cls, data):

699

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

700

if not isinstance(alert_dict, dict):

701

continue

702

for alert in alert_dict.values():

703

alert_type = alert.get('type')

704

if not alert_type:

705

continue

706

message = cls._get_text(alert, 'text')

707

if message:

708

yield alert_type, message

709

710

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

711

errors, warnings = [], []

712

for alert_type, alert_message in alerts:

713

if alert_type.lower() == 'error' and fatal:

714

errors.append([alert_type, alert_message])

715

elif alert_message not in self._IGNORED_WARNINGS:

716

warnings.append([alert_type, alert_message])

717

718

for alert_type, alert_message in (warnings + errors[:-1]):

719

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

720

if errors:

721

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

722

723

def _extract_and_report_alerts(self, data, *args, **kwargs):

724

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

725

726

def _extract_badges(self, renderer: dict):

727

privacy_icon_map = {

728

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

729

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

730

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

735

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

736

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

741

'private': BadgeType.AVAILABILITY_PRIVATE,

742

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

743

'live': BadgeType.LIVE_NOW,

744

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

749

badge_type = (

750

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

751

or badge_style_map.get(traverse_obj(badge, 'style'))

752

)

753

if badge_type:

754

badges.append({'type': badge_type})

755

continue

756

757

# fallback, won't work in some languages

758

label = traverse_obj(badge, 'label', expected_type=str, default='')

759

for match, label_badge_type in label_map.items():

760

if match in label.lower():

761

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

768

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

769

770

@staticmethod

771

def _get_text(data, *path_list, max_runs=None):

772

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

777

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

778

obj = [obj]

779

for item in obj:

780

text = try_get(item, lambda x: x['simpleText'], str)

781

if text:

782

return text

783

runs = try_get(item, lambda x: x['runs'], list) or []

784

if not runs and isinstance(item, list):

785

runs = item

786

787

runs = runs[:min(len(runs), max_runs or len(runs))]

788

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

793

count_text = self._get_text(data, *path_list) or ''

794

count = parse_count(count_text)

795

if count is None:

796

count = str_to_int(

797

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

802

"""

803

Extract thumbnails from thumbnails dict

804

@param path_list: path list to level that contains 'thumbnails' key

805

"""

806

thumbnails = []

807

for path in path_list or [()]:

808

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

809

thumbnail_url = url_or_none(thumbnail.get('url'))

810

if not thumbnail_url:

811

continue

812

# Sometimes youtube gives a wrong thumbnail URL. See:

813

# https://github.com/yt-dlp/yt-dlp/issues/233

814

# https://github.com/ytdl-org/youtube-dl/issues/28023

815

if 'maxresdefault' in thumbnail_url:

816

thumbnail_url = thumbnail_url.split('?')[0]

817

thumbnails.append({

818

'url': thumbnail_url,

819

'height': int_or_none(thumbnail.get('height')),

820

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

826

"""

827

Extracts a relative time from string and converts to dt object

828

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

833

if start:

834

return datetime_from_str(start)

835

try:

836

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

841

if not text:

842

return

843

dt = self.extract_relative_time(text)

844

timestamp = None

845

if isinstance(dt, datetime.datetime):

846

timestamp = calendar.timegm(dt.timetuple())

847

848

if timestamp is None:

849

timestamp = (

850

unified_timestamp(text) or unified_timestamp(

851

self._search_regex(

852

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

853

text.lower(), 'time text', default=None)))

854

855

if text and timestamp is None and self._preferred_lang in (None, 'en'):

856

self.report_warning(

857

f'Cannot parse localized time text "{text}"', only_once=True)

858

return timestamp

859

860

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

861

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

862

default_client='web'):

863

for retry in self.RetryManager():

864

try:

865

response = self._call_api(

866

ep=ep, fatal=True, headers=headers,

867

video_id=item_id, query=query, note=note,

868

context=self._extract_context(ytcfg, default_client),

869

api_key=self._extract_api_key(ytcfg, default_client),

870

api_hostname=api_hostname, default_client=default_client)

871

except ExtractorError as e:

872

if not isinstance(e.cause, network_exceptions):

873

return self._error_or_warning(e, fatal=fatal)

874

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

879

if not is_html(first_bytes):

880

yt_error = try_get(

881

self._parse_json(

882

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

883

lambda x: x['error']['message'], str)

884

if yt_error:

885

self._report_alerts([('ERROR', yt_error)], fatal=False)

886

# Downloading page may result in intermittent 5xx HTTP error

887

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

888

# We also want to catch all other network exceptions since errors in later pages can be troublesome

889

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

890

if e.cause.code not in (403, 429):

891

retry.error = e

892

continue

893

return self._error_or_warning(e, fatal=fatal)

894

895

try:

896

self._extract_and_report_alerts(response, only_once=True)

897

except ExtractorError as e:

898

# YouTube servers may return errors we want to retry on in a 200 OK response

899

# See: https://github.com/yt-dlp/yt-dlp/issues/839

900

if 'unknown error' in e.msg.lower():

901

retry.error = e

902

continue

903

return self._error_or_warning(e, fatal=fatal)

904

# Youtube sometimes sends incomplete data

905

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

906

if not traverse_obj(response, *variadic(check_get_keys)):

907

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

914

return re.match(r'https?://music\.youtube\.com/', url) is not None

915

916

def _extract_video(self, renderer):

917

video_id = renderer.get('videoId')

918

919

reel_header_renderer = traverse_obj(renderer, (

920

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

921

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

922

923

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

924

description = self._get_text(renderer, 'descriptionSnippet')

925

926

duration = int_or_none(renderer.get('lengthSeconds'))

927

if duration is None:

928

duration = parse_duration(self._get_text(

929

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

930

if duration is None:

931

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

932

duration = parse_duration(self._search_regex(

933

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

934

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

935

video_id, default=None, group='duration'))

936

937

channel_id = traverse_obj(

938

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

939

expected_type=str, get_all=False)

940

if not channel_id:

941

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

942

943

overlay_style = traverse_obj(

944

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

945

get_all=False, expected_type=str)

946

badges = self._extract_badges(renderer)

947

948

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

949

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

950

expected_type=str)) or ''

951

url = f'https://www.youtube.com/watch?v={video_id}'

952

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

953

url = f'https://www.youtube.com/shorts/{video_id}'

954

955

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

956

or self._get_text(reel_header_renderer, 'timestampText') or '')

957

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

958

959

live_status = (

960

'is_upcoming' if scheduled_timestamp is not None

961

else 'was_live' if 'streamed' in time_text.lower()

962

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

963

else None)

964

965

# videoInfo is a string like '50K views • 10 years ago'.

966

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

967

view_count = (0 if 'no views' in view_count_text.lower()

968

else self._get_count({'simpleText': view_count_text}))

969

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

978

'duration': duration,

979

'channel_id': channel_id,

980

'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')

981

or self._get_text(reel_header_renderer, 'channelTitleText')),

982

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

983

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

984

'timestamp': (self._parse_time_text(time_text)

985

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

986

else None),

987

'release_timestamp': scheduled_timestamp,

988

'availability':

989

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

990

else self._availability(

991

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

992

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

993

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

994

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

995

view_count_field: view_count,

996

'live_status': live_status

}

class YoutubeIE(YoutubeBaseInfoExtractor):

1001

IE_DESC = 'YouTube'

1002

_VALID_URL = r"""(?x)^

1003

(

1004

(?:https?://|//) # http(s):// or protocol-independent URL

1005

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1006

(?:www\.)?deturl\.com/www\.youtube\.com|

1007

(?:www\.)?pwnyoutube\.com|

1008

(?:www\.)?hooktube\.com|

1009

(?:www\.)?yourepeat\.com|

1010

tube\.majestyc\.net|

1011

%(invidious)s|

1012

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1013

(?:.*?\#/)? # handle anchor (#/) redirect urls

1014

(?: # the various things that can precede the ID:

1015

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

1016

|(?: # or the v= param in all its forms

1017

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1018

(?:\?|\#!?) # the params delimiter ? or # or #!

1019

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1025

vid\.plus| # or vid.plus/xxxx

1026

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1027

%(invidious)s

1028

)/

1029

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1030

)

1031

)? # all until now is optional -> you can pass the naked ID

1032

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1033

(?(1).+)? # if we found the ID, everything can follow

1034

(?:\#|$)""" % {

1035

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1049

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1050

\1''',

1051

# https://wordpress.org/plugins/lazy-load-for-videos/

1052

r'''(?xs)

1053

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1054

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1055

]

1056

_RETURN_TYPE = 'video' # XXX: How to handle multifeed?

1057

1058

_PLAYER_INFO_RE = (

1059

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1060

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1061

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1062

)

1063

_formats = {

1064

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1065

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1066

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1067

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1068

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1069

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1070

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1071

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1072

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1073

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1074

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1075

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1076

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1077

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1078

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1079

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1080

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1081

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1086

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1087

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1088

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1089

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1090

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1091

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1092

1093

# Apple HTTP Live Streaming

1094

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1095

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1096

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1097

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1098

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1099

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1100

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1101

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1102

1103

# DASH mp4 video

1104

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1105

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1106

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1107

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1108

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1109

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1110

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1111

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1112

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1113

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1114

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1115

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1116

1117

# Dash mp4 audio

1118

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1119

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1120

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1121

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1122

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1123

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1124

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1125

1126

# Dash webm

1127

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1128

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1129

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1130

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1131

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1132

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1133

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1134

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1135

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1136

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1137

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1138

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1139

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1140

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1141

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1142

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1143

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1144

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1145

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1146

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1147

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1148

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1149

1150

# Dash webm audio

1151

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1152

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1153

1154

# Dash webm audio with opus inside

1155

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1156

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1157

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1158

1159

# RTMP (unnamed)

1160

'_rtmp': {'protocol': 'rtmp'},

1161

1162

# av01 video only formats sometimes served with "unknown" codecs

1163

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1164

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1165

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1166

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1167

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1168

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1169

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1170

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1171

}

1172

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1184

'uploader': 'Philipp Hagemeister',

1185

'uploader_id': 'phihag',

1186

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1187

'channel': 'Philipp Hagemeister',

1188

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1189

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1190

'upload_date': '20121002',

1191

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1192

'categories': ['Science & Technology'],

1193

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1198

'playable_in_embed': True,

1199

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1200

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1205

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1210

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1215

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1216

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1217

'uploader': 'SET India',

1218

'uploader_id': 'setindia',

1219

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1220

'age_limit': 18,

1221

},

1222

'skip': 'Private video',

1223

},

1224

{

1225

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1226

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1231

'uploader': 'Philipp Hagemeister',

1232

'uploader_id': 'phihag',

1233

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1234

'channel': 'Philipp Hagemeister',

1235

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1236

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1237

'upload_date': '20121002',

1238

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1239

'categories': ['Science & Technology'],

1240

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1245

'playable_in_embed': True,

1246

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1247

'live_status': 'not_live',

1248

'age_limit': 0,

1249

'comment_count': int,

1250

'channel_follower_count': int

1251

},

1252

'params': {

1253

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1258

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1263

'uploader_id': '8KVIDEO',

1264

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1265

'description': '',

1266

'uploader': '8KVIDEO',

1267

'title': 'UHDTV TEST 8K VIDEO.mp4'

1268

},

1269

'params': {

1270

'youtube_include_dash_manifest': True,

1271

'format': '141',

1272

},

1273

'skip': 'format 141 not served anymore',

1274

},

1275

# DASH manifest with encrypted signature

1276

{

1277

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1282

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1283

'duration': 244,

1284

'uploader': 'AfrojackVEVO',

1285

'uploader_id': 'AfrojackVEVO',

1286

'upload_date': '20131011',

1287

'abr': 129.495,

1288

'like_count': int,

1289

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1290

'playable_in_embed': True,

1291

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1292

'view_count': int,

1293

'track': 'The Spark',

1294

'live_status': 'not_live',

1295

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1296

'channel': 'Afrojack',

1297

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1298

'tags': 'count:19',

1299

'availability': 'public',

1300

'categories': ['Music'],

1301

'age_limit': 0,

1302

'alt_title': 'The Spark',

1303

'channel_follower_count': int

1304

},

1305

'params': {

1306

'youtube_include_dash_manifest': True,

1307

'format': '141/bestaudio[ext=m4a]',

1308

},

1309

},

1310

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1311

{

1312

'note': 'Embed allowed age-gate video',

1313

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1318

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1319

'duration': 142,

1320

'uploader': 'The Witcher',

1321

'uploader_id': 'WitcherGame',

1322

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1323

'upload_date': '20140605',

1324

'age_limit': 18,

1325

'categories': ['Gaming'],

1326

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1327

'availability': 'needs_auth',

1328

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1329

'like_count': int,

1330

'channel': 'The Witcher',

1331

'live_status': 'not_live',

1332

'tags': 'count:17',

1333

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1334

'playable_in_embed': True,

1335

'view_count': int,

1336

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1341

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1346

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1347

'upload_date': '20200408',

1348

'uploader_id': 'FlyingKitty900',

1349

'uploader': 'FlyingKitty',

1350

'age_limit': 18,

1351

'availability': 'needs_auth',

1352

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1353

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1354

'channel': 'FlyingKitty',

1355

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1356

'view_count': int,

1357

'categories': ['Entertainment'],

1358

'live_status': 'not_live',

1359

'tags': ['Flyingkitty', 'godzilla 2'],

1360

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1361

'like_count': int,

1362

'duration': 177,

1363

'playable_in_embed': True,

1364

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1369

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1370

'info_dict': {

1371

'id': 'Tq92D6wQ1mg',

1372

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1373

'ext': 'mp4',

1374

'upload_date': '20191228',

1375

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1376

'uploader': 'Projekt Melody',

1377

'description': 'md5:17eccca93a786d51bc67646756894066',

1378

'age_limit': 18,

1379

'like_count': int,

1380

'availability': 'needs_auth',

1381

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1382

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1383

'view_count': int,

1384

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1385

'channel': 'Projekt Melody',

1386

'live_status': 'not_live',

1387

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1388

'playable_in_embed': True,

1389

'categories': ['Entertainment'],

1390

'duration': 106,

1391

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1392

'comment_count': int,

1393

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1398

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1403

'uploader': 'Herr Lurik',

1404

'uploader_id': 'st3in234',

1405

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1406

'upload_date': '20130730',

1407

'track': 'Such mich find mich',

1408

'age_limit': 0,

1409

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1410

'like_count': int,

1411

'playable_in_embed': False,

1412

'creator': 'OOMPH!',

1413

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1414

'view_count': int,

1415

'alt_title': 'Such mich find mich',

1416

'duration': 210,

1417

'channel': 'Herr Lurik',

1418

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1419

'categories': ['Music'],

1420

'availability': 'public',

1421

'uploader_url': 'http://www.youtube.com/user/st3in234',

1422

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1423

'live_status': 'not_live',

1424

'artist': 'OOMPH!',

1425

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1430

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1431

'only_matching': True,

1432

},

1433

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1434

# YouTube Red ad is not captured for creator

1435

{

1436

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1442

'uploader_id': 'deadmau5',

1443

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1444

'creator': 'deadmau5',

1445

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1446

'uploader': 'deadmau5',

1447

'title': 'Deadmau5 - Some Chords (HD)',

1448

'alt_title': 'Some Chords',

1449

'availability': 'public',

1450

'tags': 'count:14',

1451

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1452

'view_count': int,

1453

'live_status': 'not_live',

1454

'channel': 'deadmau5',

1455

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1456

'like_count': int,

1457

'track': 'Some Chords',

1458

'artist': 'deadmau5',

1459

'playable_in_embed': True,

1460

'age_limit': 0,

1461

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1462

'categories': ['Music'],

1463

'album': 'Some Chords',

1464

'channel_follower_count': int

1465

},

1466

'expected_warnings': [

1467

'DASH manifest missing',

1468

]

1469

},

1470

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1471

{

1472

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1478

'uploader_id': 'olympic',

1479

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1480

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1481

'uploader': 'Olympics',

1482

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1483

'like_count': int,

1484

'release_timestamp': 1343767800,

1485

'playable_in_embed': True,

1486

'categories': ['Sports'],

1487

'release_date': '20120731',

1488

'channel': 'Olympics',

1489

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1490

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1491

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1492

'age_limit': 0,

1493

'availability': 'public',

1494

'live_status': 'was_live',

1495

'view_count': int,

1496

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1497

'channel_follower_count': int

1498

},

1499

'params': {

1500

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1510

'duration': 85,

1511

'upload_date': '20110310',

1512

'uploader_id': 'AllenMeow',

1513

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1514

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1515

'uploader': '孫ᄋᄅ',

1516

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1517

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1522

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1523

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1524

'view_count': int,

1525

'categories': ['People & Blogs'],

1526

'like_count': int,

1527

'live_status': 'not_live',

1528

'availability': 'unlisted',

1529

'comment_count': int,

1530

'channel_follower_count': int

1531

},

1532

},

1533

# url_encoded_fmt_stream_map is empty string

1534

{

1535

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1540

'description': '',

1541

'upload_date': '20150404',

1542

'uploader_id': 'spbelect',

1543

'uploader': 'Наблюдатели Петербурга',

1544

},

1545

'params': {

1546

'skip_download': 'requires avconv',

1547

},

1548

'skip': 'This live event has ended.',

1549

},

1550

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1551

{

1552

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1557

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1558

'duration': 220,

1559

'upload_date': '20150625',

1560

'uploader_id': 'dorappi2000',

1561

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1562

'uploader': 'dorappi2000',

1563

'formats': 'mincount:31',

1564

},

1565

'skip': 'not actual anymore',

1566

},

1567

# DASH manifest with segment_list

1568

{

1569

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1570

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1575

'uploader': 'Airtek',

1576

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1577

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1578

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1579

},

1580

'params': {

1581

'youtube_include_dash_manifest': True,

1582

'format': '135', # bestvideo

1583

},

1584

'skip': 'This live event has ended.',

1585

},

1586

{

1587

# Multifeed videos (multiple cameras), URL can be of any Camera

1588

'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',

1589

'info_dict': {

1590

'id': 'zaPI8MvL8pg',

1591

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',

1592

'description': 'md5:563ccbc698b39298481ca3c571169519',

},

'playlist': [{

'info_dict': {

'id': 'j5yGuxZ8lLU',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',

1599

'uploader': 'WiiLikeToPlay',

1600

'description': 'md5:563ccbc698b39298481ca3c571169519',

1601

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1602

'duration': 10120,

1603

'channel_follower_count': int,

1604

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1605

'availability': 'public',

1606

'playable_in_embed': True,

1607

'upload_date': '20131105',

1608

'uploader_id': 'WiiRikeToPray',

1609

'categories': ['Gaming'],

1610

'live_status': 'was_live',

1611

'tags': 'count:24',

1612

'release_timestamp': 1383701910,

1613

'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',

1614

'comment_count': int,

1615

'age_limit': 0,

1616

'like_count': int,

1617

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1618

'channel': 'WiiLikeToPlay',

1619

'view_count': int,

1620

'release_date': '20131106',

},

}, {

'info_dict': {

'id': 'zaPI8MvL8pg',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',

1627

'uploader_id': 'WiiRikeToPray',

1628

'availability': 'public',

1629

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1630

'channel': 'WiiLikeToPlay',

1631

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1632

'channel_follower_count': int,

1633

'description': 'md5:563ccbc698b39298481ca3c571169519',

'duration': 10108,

'age_limit': 0,

'like_count': int,

'tags': 'count:24',

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1639

'uploader': 'WiiLikeToPlay',

1640

'release_timestamp': 1383701915,

1641

'comment_count': int,

1642

'upload_date': '20131105',

1643

'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',

1644

'release_date': '20131106',

1645

'playable_in_embed': True,

1646

'live_status': 'was_live',

1647

'categories': ['Gaming'],

'view_count': int,

},

}, {

'info_dict': {

'id': 'R7r3vfO7Hao',

'ext': 'mp4',

'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',

1655

'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',

1656

'channel_id': 'UCN2XePorRokPB9TEgRZpddg',

1657

'like_count': int,

1658

'availability': 'public',

1659

'playable_in_embed': True,

1660

'upload_date': '20131105',

1661

'description': 'md5:563ccbc698b39298481ca3c571169519',

1662

'uploader_id': 'WiiRikeToPray',

1663

'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',

1664

'channel_follower_count': int,

1665

'tags': 'count:24',

1666

'release_date': '20131106',

1667

'uploader': 'WiiLikeToPlay',

1668

'comment_count': int,

1669

'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',

1670

'channel': 'WiiLikeToPlay',

1671

'categories': ['Gaming'],

1672

'release_timestamp': 1383701914,

1673

'live_status': 'was_live',

'age_limit': 0,

'duration': 10128,

'view_count': int,

},

}],

'params': {'skip_download': True},

1680

},

1681

{

1682

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1683

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1684

'info_dict': {

1685

'id': 'gVfLd0zydlo',

1686

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1687

},

1688

'playlist_count': 2,

1689

'skip': 'Not multifeed anymore',

1690

},

1691

{

1692

'url': 'https://vid.plus/FlRa-iH7PGw',

1693

'only_matching': True,

1694

},

1695

{

1696

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1697

'only_matching': True,

1698

},

1699

{

1700

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1701

# Also tests cut-off URL expansion in video description (see

1702

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1703

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1704

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1709

'alt_title': 'Dark Walk',

1710

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1711

'duration': 133,

1712

'upload_date': '20151119',

1713

'uploader_id': 'IronSoulElf',

1714

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1715

'uploader': 'IronSoulElf',

1716

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1717

'track': 'Dark Walk',

1718

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1719

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1720

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1721

'categories': ['Film & Animation'],

1722

'view_count': int,

1723

'live_status': 'not_live',

1724

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1725

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1726

'tags': 'count:13',

1727

'availability': 'public',

1728

'channel': 'IronSoulElf',

1729

'playable_in_embed': True,

1730

'like_count': int,

1731

'age_limit': 0,

1732

'channel_follower_count': int

1733

},

1734

'params': {

1735

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1740

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1741

'only_matching': True,

1742

},

1743

{

1744

# Video with yt:stretch=17:0

1745

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1750

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1751

'upload_date': '20151107',

1752

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1753

'uploader': 'CH GAMER DROID',

1754

},

1755

'params': {

1756

'skip_download': True,

1757

},

1758

'skip': 'This video does not exist.',

1759

},

1760

{

1761

# Video with incomplete 'yt:stretch=16:'

1762

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1763

'only_matching': True,

1764

},

1765

{

1766

# Video licensed under Creative Commons

1767

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1772

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1773

'duration': 721,

1774

'upload_date': '20150128',

1775

'uploader_id': 'BerkmanCenter',

1776

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1777

'uploader': 'The Berkman Klein Center for Internet & Society',

1778

'license': 'Creative Commons Attribution license (reuse allowed)',

1779

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1780

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1781

'like_count': int,

1782

'age_limit': 0,

1783

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1784

'channel': 'The Berkman Klein Center for Internet & Society',

1785

'availability': 'public',

1786

'view_count': int,

1787

'categories': ['Education'],

1788

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1789

'live_status': 'not_live',

1790

'playable_in_embed': True,

1791

'comment_count': int,

1792

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1801

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1806

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1807

'duration': 4060,

1808

'upload_date': '20151120',

1809

'uploader': 'Bernie Sanders',

1810

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1811

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1812

'license': 'Creative Commons Attribution license (reuse allowed)',

1813

'playable_in_embed': True,

1814

'tags': 'count:12',

1815

'like_count': int,

1816

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1817

'age_limit': 0,

1818

'availability': 'public',

1819

'categories': ['News & Politics'],

1820

'channel': 'Bernie Sanders',

1821

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1822

'view_count': int,

1823

'live_status': 'not_live',

1824

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1825

'comment_count': int,

1826

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1835

'only_matching': True,

1836

},

1837

{

1838

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1839

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1840

'only_matching': True,

1841

},

1842

{

1843

# Rental video preview

1844

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1849

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1850

'upload_date': '20150811',

1851

'uploader': 'FlixMatrix',

1852

'uploader_id': 'FlixMatrixKaravan',

1853

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1854

'license': 'Standard YouTube License',

1855

},

1856

'params': {

1857

'skip_download': True,

1858

},

1859

'skip': 'This video is not available.',

1860

},

1861

{

1862

# YouTube Red video with episode data

1863

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1868

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1869

'duration': 2085,

1870

'upload_date': '20170118',

1871

'uploader': 'Vsauce',

1872

'uploader_id': 'Vsauce',

1873

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1874

'series': 'Mind Field',

1875

'season_number': 1,

1876

'episode_number': 1,

1877

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1878

'tags': 'count:12',

1879

'view_count': int,

1880

'availability': 'public',

1881

'age_limit': 0,

1882

'channel': 'Vsauce',

1883

'episode': 'Episode 1',

1884

'categories': ['Entertainment'],

1885

'season': 'Season 1',

1886

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1887

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1888

'like_count': int,

1889

'playable_in_embed': True,

1890

'live_status': 'not_live',

1891

'channel_follower_count': int

1892

},

1893

'params': {

1894

'skip_download': True,

1895

},

1896

'expected_warnings': [

1897

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1902

# as inappropriate or offensive to some audiences.

1903

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1908

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1909

'duration': 965,

1910

'upload_date': '20140124',

1911

'uploader': 'New Century Foundation',

1912

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1913

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1914

},

1915

'params': {

1916

'skip_download': True,

1917

},

1918

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1923

'only_matching': True,

1924

},

1925

{

1926

# geo restricted to JP

1927

'url': 'sJL6WA-aGkQ',

1928

'only_matching': True,

1929

},

1930

{

1931

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1932

'only_matching': True,

1933

},

1934

{

1935

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1936

'only_matching': True,

1937

},

1938

{

1939

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1940

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1941

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1946

'only_matching': True,

1947

},

1948

{

1949

# Video with unsupported adaptive stream type formats

1950

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1955

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1956

'duration': 433,

1957

'upload_date': '20130923',

1958

'uploader': 'Amelia Putri Harwita',

1959

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1960

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1961

'formats': 'maxcount:10',

1962

},

1963

'params': {

1964

'skip_download': True,

1965

'youtube_include_dash_manifest': False,

1966

},

1967

'skip': 'not actual anymore',

1968

},

1969

{

1970

# Youtube Music Auto-generated description

1971

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1976

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1977

'upload_date': '20190312',

1978

'uploader': 'Stephen - Topic',

1979

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1980

'artist': 'Stephen',

1981

'track': 'Voyeur Girl',

1982

'album': 'it\'s too much love to know my dear',

1983

'release_date': '20190313',

1984

'release_year': 2019,

1985

'alt_title': 'Voyeur Girl',

1986

'view_count': int,

1987

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1988

'playable_in_embed': True,

1989

'like_count': int,

1990

'categories': ['Music'],

1991

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1992

'channel': 'Stephen',

1993

'availability': 'public',

1994

'creator': 'Stephen',

1995

'duration': 169,

1996

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1997

'age_limit': 0,

1998

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1999

'tags': 'count:11',

2000

'live_status': 'not_live',

2001

'channel_follower_count': int

2002

},

2003

'params': {

2004

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

2009

'only_matching': True,

2010

},

2011

{

2012

# invalid -> valid video id redirection

2013

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

2018

'description': 'md5:bf577a41da97918e94fa9798d9228825',

2019

'upload_date': '20090125',

2020

'uploader': 'Prochorowka',

2021

'uploader_id': 'Prochorowka',

2022

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

2023

'artist': 'Panjabi MC',

2024

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

2025

'album': 'Beware of the Boys (Mundian To Bach Ke)',

2026

},

2027

'params': {

2028

'skip_download': True,

2029

},

2030

'skip': 'Video unavailable',

2031

},

2032

{

2033

# empty description results in an empty string

2034

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2041

'uploader_id': 'ElevageOrVert',

2042

'uploader': 'ElevageOrVert',

2043

'view_count': int,

2044

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2045

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

2046

'like_count': int,

2047

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2048

'tags': [],

2049

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2050

'availability': 'public',

2051

'age_limit': 0,

2052

'categories': ['Pets & Animals'],

2053

'duration': 7,

2054

'playable_in_embed': True,

2055

'live_status': 'not_live',

2056

'channel': 'ElevageOrVert',

2057

'channel_follower_count': int

2058

},

2059

'params': {

2060

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2065

# see [2] for an example with '};' inside ytInitialPlayerResponse

2066

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2067

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2068

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2073

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2074

'upload_date': '20130831',

2075

'uploader_id': 'kudvenkat',

2076

'uploader': 'kudvenkat',

2077

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2078

'like_count': int,

2079

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2080

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2081

'live_status': 'not_live',

2082

'categories': ['Education'],

2083

'availability': 'public',

2084

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2085

'tags': 'count:12',

2086

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2091

'comment_count': int,

2092

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2101

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2102

'only_matching': True,

2103

},

2104

{

2105

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2106

'only_matching': True,

2107

},

2108

{

2109

# https://github.com/ytdl-org/youtube-dl/pull/28094

2110

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2116

'upload_date': '20141120',

2117

'uploader': 'The Cinematic Orchestra - Topic',

2118

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2119

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2120

'artist': 'The Cinematic Orchestra',

2121

'track': 'Burn Out',

2122

'album': 'Every Day',

2123

'like_count': int,

2124

'live_status': 'not_live',

2125

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2130

'creator': 'The Cinematic Orchestra',

2131

'channel': 'The Cinematic Orchestra',

2132

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2133

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2134

'availability': 'public',

2135

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2136

'categories': ['Music'],

2137

'playable_in_embed': True,

2138

'channel_follower_count': int

2139

},

2140

'params': {

2141

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2146

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2147

'only_matching': True,

2148

},

2149

{

2150

# controversial video, requires bpctr/contentCheckOk

2151

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2156

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2157

'uploader': 'CBS Mornings',

2158

'uploader_id': 'CBSThisMorning',

2159

'upload_date': '20140716',

2160

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2161

'duration': 170,

2162

'categories': ['News & Politics'],

2163

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2164

'view_count': int,

2165

'channel': 'CBS Mornings',

2166

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2167

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2168

'age_limit': 18,

2169

'availability': 'needs_auth',

2170

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2171

'like_count': int,

2172

'live_status': 'not_live',

2173

'playable_in_embed': True,

2174

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2179

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2184

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2185

'upload_date': '20201120',

2186

'uploader': 'Walk around Japan',

2187

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2188

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2189

'duration': 1456,

2190

'categories': ['Travel & Events'],

2191

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2192

'view_count': int,

2193

'channel': 'Walk around Japan',

2194

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2195

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2196

'age_limit': 0,

2197

'availability': 'public',

2198

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2199

'live_status': 'not_live',

2200

'playable_in_embed': True,

2201

'channel_follower_count': int

2202

},

2203

'params': {

2204

'skip_download': True,

2205

},

2206

}, {

2207

# Has multiple audio streams

2208

'url': 'WaOKSUlf4TM',

2209

'only_matching': True

2210

}, {

2211

# Requires Premium: has format 141 when requested using YTM url

2212

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2213

'only_matching': True

2214

}, {

2215

# multiple subtitles with same lang_code

2216

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2217

'only_matching': True,

2218

}, {

2219

# Force use android client fallback

2220

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2221

'info_dict': {

2222

'id': 'YOelRv7fMxY',

2223

'title': 'DIGGING A SECRET TUNNEL Part 1',

2224

'ext': '3gp',

2225

'upload_date': '20210624',

2226

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2227

'uploader': 'colinfurze',

2228

'uploader_id': 'colinfurze',

2229

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2230

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2231

'duration': 596,

2232

'categories': ['Entertainment'],

2233

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2234

'view_count': int,

2235

'channel': 'colinfurze',

2236

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2237

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2238

'age_limit': 0,

2239

'availability': 'public',

2240

'like_count': int,

2241

'live_status': 'not_live',

2242

'playable_in_embed': True,

2243

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2248

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2253

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2254

'only_matching': True,

2255

'params': {

2256

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2261

'only_matching': True,

2262

}, {

2263

'note': 'Storyboards',

2264

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2270

'uploader_id': 'scishow',

2271

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2272

'upload_date': '20140324',

2273

'uploader': 'SciShow',

2274

'like_count': int,

2275

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2276

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2277

'view_count': int,

2278

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2279

'playable_in_embed': True,

2280

'tags': 'count:12',

2281

'uploader_url': 'http://www.youtube.com/user/scishow',

2282

'availability': 'public',

2283

'channel': 'SciShow',

2284

'live_status': 'not_live',

2285

'duration': 248,

2286

'categories': ['Education'],

2287

'age_limit': 0,

2288

'channel_follower_count': int,

2289

'chapters': list,

2290

}, 'params': {'format': 'mhtml', 'skip_download': True}

2291

}, {

2292

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2293

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2298

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2299

'uploader': 'Leon Nguyen',

2300

'uploader_id': 'VNSXIII',

2301

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2302

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2303

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2308

'tags': 'count:23',

2309

'playable_in_embed': True,

2310

'live_status': 'not_live',

2311

'upload_date': '20220103',

2312

'like_count': int,

2313

'availability': 'public',

2314

'channel': 'Leon Nguyen',

2315

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2316

'comment_count': int,

2317

'channel_follower_count': int

2318

}

2319

}, {

2320

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2321

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2326

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2327

'uploader': 'Leon Nguyen',

2328

'uploader_id': 'VNSXIII',

2329

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2330

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2331

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2336

'tags': 'count:23',

2337

'playable_in_embed': True,

2338

'live_status': 'not_live',

2339

'upload_date': '20220102',

2340

'like_count': int,

2341

'availability': 'public',

2342

'channel': 'Leon Nguyen',

2343

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2344

'comment_count': int,

2345

'channel_follower_count': int

2346

},

2347

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2348

}, {

2349

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2350

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2355

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2356

'uploader': 'Quackity',

2357

'uploader_id': 'QuackityHQ',

2358

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2359

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2360

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2365

'tags': 'count:26',

2366

'playable_in_embed': True,

2367

'live_status': 'not_live',

2368

'release_timestamp': 1641172509,

2369

'release_date': '20220103',

2370

'upload_date': '20220103',

2371

'like_count': int,

2372

'availability': 'public',

2373

'channel': 'Quackity',

2374

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2375

'channel_follower_count': int

2376

}

2377

},

2378

{ # continuous livestream. Microformat upload date should be preferred.

2379

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2380

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2381

'info_dict': {

2382

'id': 'kgx4WGK0oNU',

2383

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2384

'ext': 'mp4',

2385

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2386

'availability': 'public',

2387

'age_limit': 0,

2388

'release_timestamp': 1637975704,

2389

'upload_date': '20210619',

2390

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2391

'live_status': 'is_live',

2392

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2393

'uploader': '阿鲍Abao',

2394

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2395

'channel': 'Abao in Tokyo',

2396

'channel_follower_count': int,

2397

'release_date': '20211127',

2398

'tags': 'count:39',

2399

'categories': ['People & Blogs'],

2400

'like_count': int,

2401

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2402

'view_count': int,

2403

'playable_in_embed': True,

2404

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2405

'concurrent_view_count': int,

2406

},

2407

'params': {'skip_download': True}

2408

}, {

2409

# Story. Requires specific player params to work.

2410

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2415

'view_count': int,

2416

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2417

'upload_date': '20220526',

2418

'categories': ['Education'],

2419

'title': 'Story',

2420

'channel': 'IT\'S HISTORY',

2421

'description': '',

2422

'uploader_id': 'BlastfromthePast',

2423

'duration': 12,

2424

'uploader': 'IT\'S HISTORY',

2425

'playable_in_embed': True,

2426

'age_limit': 0,

2427

'live_status': 'not_live',

2428

'tags': [],

2429

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2430

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2431

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2432

},

2433

'skip': 'stories get removed after some period of time',

2434

}, {

2435

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2440

'upload_date': '20220323',

2441

'like_count': int,

2442

'availability': 'unlisted',

2443

'channel': 'nao20010128nao',

2444

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2445

'age_limit': 0,

2446

'uploader': 'nao20010128nao',

2447

'uploader_id': 'nao20010128nao',

2448

'categories': ['Music'],

2449

'view_count': int,

2450

'description': '',

2451

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2452

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2453

'live_status': 'not_live',

2454

'playable_in_embed': True,

2455

'channel_follower_count': int,

2456

'duration': 6,

2457

'tags': [],

2458

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2459

}

2460

}, {

2461

# Prefer primary title+description language metadata by default

2462

# Do not prefer translated description if primary is empty

2463

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2468

'description': '',

2469

'channel': 'cole-dlp-test-acc',

2470

'tags': [],

2471

'view_count': int,

2472

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2473

'like_count': int,

2474

'playable_in_embed': True,

2475

'availability': 'unlisted',

2476

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2477

'age_limit': 0,

2478

'duration': 5,

2479

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2480

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2481

'live_status': 'not_live',

2482

'upload_date': '20220908',

2483

'categories': ['People & Blogs'],

2484

'uploader': 'cole-dlp-test-acc',

2485

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2486

},

2487

'params': {'skip_download': True}

2488

}, {

2489

# Extractor argument: prefer translated title+description

2490

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2495

'tags': [],

2496

'duration': 5,

2497

'live_status': 'not_live',

2498

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2499

'upload_date': '20220728',

2500

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2501

'view_count': int,

2502

'categories': ['People & Blogs'],

2503

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2504

'title': 'dlp test video title translated (fr)',

2505

'availability': 'public',

2506

'uploader': 'cole-dlp-test-acc',

2507

'age_limit': 0,

2508

'description': 'dlp test video description translated (fr)',

2509

'playable_in_embed': True,

2510

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2511

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2512

},

2513

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2514

'expected_warnings': [r'Preferring "fr" translated fields'],

2515

}, {

2516

'note': '6 channel audio',

2517

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2518

'only_matching': True,

2519

}, {

2520

'note': 'Multiple HLS formats with same itag',

2521

'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',

'info_dict': {

'id': 'kX3nB4PpJko',

'ext': 'mp4',

'categories': ['Entertainment'],

2526

'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',

2527

'uploader_url': 'http://www.youtube.com/user/MrBeast6000',

2528

'live_status': 'not_live',

2529

'duration': 937,

2530

'channel_follower_count': int,

2531

'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',

2532

'title': 'Last To Take Hand Off Jet, Keeps It!',

2533

'channel': 'MrBeast',

2534

'playable_in_embed': True,

2535

'view_count': int,

2536

'upload_date': '20221112',

2537

'uploader': 'MrBeast',

2538

'uploader_id': 'MrBeast6000',

2539

'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',

2540

'age_limit': 0,

2541

'availability': 'public',

2542

'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',

'like_count': int,

'tags': [],

},

'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2552

{

2553

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2554

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2559

'upload_date': '20080526',

2560

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2561

'uploader': 'Christopher Sykes',

2562

'uploader_id': 'ChristopherJSykes',

2563

'age_limit': 0,

2564

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2565

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2566

'playable_in_embed': True,

2567

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2568

'like_count': int,

2569

'comment_count': int,

2570

'channel': 'Christopher Sykes',

2571

'live_status': 'not_live',

2572

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2573

'availability': 'public',

2574

'duration': 195,

2575

'view_count': int,

2576

'categories': ['Science & Technology'],

2577

'channel_follower_count': int,

2578

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2579

},

2580

'params': {

2581

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2588

from ..utils import parse_qs

2589

2590

qs = parse_qs(url)

2591

if qs.get('list', [None])[0]:

2592

return False

2593

return super().suitable(url)

2594

2595

def __init__(self, *args, **kwargs):

2596

super().__init__(*args, **kwargs)

2597

self._code_cache = {}

2598

self._player_cache = {}

2599

2600

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2601

lock = threading.Lock()

2602

start_time = time.time()

2603

formats = [f for f in formats if f.get('is_from_start')]

2604

2605

def refetch_manifest(format_id, delay):

2606

nonlocal formats, start_time, is_live

2607

if time.time() <= start_time + delay:

2608

return

2609

2610

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2611

video_details = traverse_obj(

2612

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2613

microformats = traverse_obj(

2614

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2615

expected_type=dict, default=[])

2616

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2617

is_live = live_status == 'is_live'

2618

start_time = time.time()

2619

2620

def mpd_feed(format_id, delay):

2621

"""

2622

@returns (manifest_url, manifest_stream_number, is_live) or None

2623

"""

2624

with lock:

2625

refetch_manifest(format_id, delay)

2626

2627

f = next((f for f in formats if f['format_id'] == format_id), None)

2628

if not f:

2629

if not is_live:

2630

self.to_screen(f'{video_id}: Video is no longer live')

2631

else:

2632

self.report_warning(

2633

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2634

return None

2635

return f['manifest_url'], f['manifest_stream_number'], is_live

2636

2637

for f in formats:

2638

f['is_live'] = is_live

2639

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2640

live_start_time, mpd_feed, not is_live and f.copy())

2641

if is_live:

2642

f['fragments'] = gen

2643

f['protocol'] = 'http_dash_segments_generator'

2644

else:

2645

f['fragments'] = LazyList(gen({}))

2646

del f['is_from_start']

2647

2648

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2649

FETCH_SPAN, MAX_DURATION = 5, 432000

2650

2651

mpd_url, stream_number, is_live = None, None, True

2652

2653

begin_index = 0

2654

download_start_time = ctx.get('start') or time.time()

2655

2656

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2657

if lack_early_segments:

2658

self.report_warning(bug_reports_message(

2659

'Starting download from the last 120 hours of the live stream since '

2660

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2661

lack_early_segments = True

2662

2663

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2664

fragments, fragment_base_url = None, None

2665

2666

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2667

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2668

# Obtain from MPD's maximum seq value

2669

old_mpd_url = mpd_url

2670

last_error = ctx.pop('last_error', None)

2671

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2672

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2673

or (mpd_url, stream_number, False))

2674

if not refresh_sequence:

2675

if expire_fast and not is_live:

2676

return False, last_seq

2677

elif old_mpd_url == mpd_url:

2678

return True, last_seq

2679

if manifestless_orig_fmt:

2680

fmt_info = manifestless_orig_fmt

2681

else:

2682

try:

2683

fmts, _ = self._extract_mpd_formats_and_subtitles(

2684

mpd_url, None, note=False, errnote=False, fatal=False)

2685

except ExtractorError:

2686

fmts = None

2687

if not fmts:

2688

no_fragment_score += 2

2689

return False, last_seq

2690

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2691

fragments = fmt_info['fragments']

2692

fragment_base_url = fmt_info['fragment_base_url']

2693

assert fragment_base_url

2694

2695

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2696

return True, _last_seq

2697

2698

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2699

while is_live:

2700

fetch_time = time.time()

2701

if no_fragment_score > 30:

2702

return

2703

if last_segment_url:

2704

# Obtain from "X-Head-Seqnum" header value from each segment

2705

try:

2706

urlh = self._request_webpage(

2707

last_segment_url, None, note=False, errnote=False, fatal=False)

2708

except ExtractorError:

2709

urlh = None

2710

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2711

if last_seq is None:

2712

no_fragment_score += 2

2713

last_segment_url = None

2714

continue

2715

else:

2716

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2717

no_fragment_score += 2

2718

if not should_continue:

2719

continue

2720

2721

if known_idx > last_seq:

2722

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2728

# skip from the start when it's negative value

2729

known_idx = last_seq + begin_index

2730

if lack_early_segments:

2731

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2732

try:

2733

for idx in range(known_idx, last_seq):

2734

# do not update sequence here or you'll get skipped some part of it

2735

should_continue, _ = _extract_sequence_from_mpd(False, False)

2736

if not should_continue:

2737

known_idx = idx - 1

2738

raise ExtractorError('breaking out of outer loop')

2739

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2740

yield {

2741

'url': last_segment_url,

2742

'fragment_count': last_seq,

2743

}

2744

if known_idx == last_seq:

2745

no_fragment_score += 5

2746

else:

2747

no_fragment_score = 0

2748

known_idx = last_seq

2749

except ExtractorError:

2750

continue

2751

2752

if manifestless_orig_fmt:

2753

# Stop at the first iteration if running for post-live manifestless;

2754

# fragment count no longer increase since it starts

2755

break

2756

2757

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2758

2759

def _extract_player_url(self, *ytcfgs, webpage=None):

2760

player_url = traverse_obj(

2761

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2762

get_all=False, expected_type=str)

2763

if not player_url:

2764

return

2765

return urljoin('https://www.youtube.com', player_url)

2766

2767

def _download_player_url(self, video_id, fatal=False):

2768

res = self._download_webpage(

2769

'https://www.youtube.com/iframe_api',

2770

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2771

if res:

2772

player_version = self._search_regex(

2773

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2774

if player_version:

2775

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2776

2777

def _signature_cache_id(self, example_sig):

2778

""" Return a string representation of a signature """

2779

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2780

2781

@classmethod

2782

def _extract_player_info(cls, player_url):

2783

for player_re in cls._PLAYER_INFO_RE:

2784

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2789

return id_m.group('id')

2790

2791

def _load_player(self, video_id, player_url, fatal=True):

2792

player_id = self._extract_player_info(player_url)

2793

if player_id not in self._code_cache:

2794

code = self._download_webpage(

2795

player_url, video_id, fatal=fatal,

2796

note='Downloading player ' + player_id,

2797

errnote='Download of %s failed' % player_url)

2798

if code:

2799

self._code_cache[player_id] = code

2800

return self._code_cache.get(player_id)

2801

2802

def _extract_signature_function(self, video_id, player_url, example_sig):

2803

player_id = self._extract_player_info(player_url)

2804

2805

# Read from filesystem cache

2806

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2807

assert os.path.basename(func_id) == func_id

2808

2809

self.write_debug(f'Extracting signature function {func_id}')

2810

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2811

2812

if not cache_spec:

2813

code = self._load_player(video_id, player_url)

2814

if code:

2815

res = self._parse_sig_js(code)

2816

test_string = ''.join(map(chr, range(len(example_sig))))

2817

cache_spec = [ord(c) for c in res(test_string)]

2818

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2819

2820

return lambda s: ''.join(s[i] for i in cache_spec)

2821

2822

def _print_sig_code(self, func, example_sig):

2823

if not self.get_param('youtube_print_sig_code'):

2824

return

2825

2826

def gen_sig_code(idxs):

2827

def _genslice(start, end, step):

2828

starts = '' if start == 0 else str(start)

2829

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2830

steps = '' if step == 1 else (':%d' % step)

2831

return f's[{starts}{ends}{steps}]'

2832

2833

step = None

2834

# Quelch pyflakes warnings - start will be set when step is set

2835

start = '(Never used)'

2836

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2841

step = None

2842

continue

2843

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2853

2854

test_string = ''.join(map(chr, range(len(example_sig))))

2855

cache_res = func(test_string)

2856

cache_spec = [ord(c) for c in cache_res]

2857

expr_code = ' + '.join(gen_sig_code(cache_spec))

2858

signature_id_tuple = '(%s)' % (

2859

', '.join(str(len(p)) for p in example_sig.split('.')))

2860

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2861

' return %s\n') % (signature_id_tuple, expr_code)

2862

self.to_screen('Extracted signature function:\n' + code)

2863

2864

def _parse_sig_js(self, jscode):

2865

funcname = self._search_regex(

2866

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2867

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2868

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2869

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2870

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2871

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2872

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2873

# Obsolete patterns

2874

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2875

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2876

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2877

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2878

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2879

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2880

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2881

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2882

jscode, 'Initial JS player signature function name', group='sig')

2883

2884

jsi = JSInterpreter(jscode)

2885

initial_function = jsi.extract_function(funcname)

2886

return lambda s: initial_function([s])

2887

2888

def _cached(self, func, *cache_id):

2889

def inner(*args, **kwargs):

2890

if cache_id not in self._player_cache:

2891

try:

2892

self._player_cache[cache_id] = func(*args, **kwargs)

2893

except ExtractorError as e:

2894

self._player_cache[cache_id] = e

2895

except Exception as e:

2896

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2897

2898

ret = self._player_cache[cache_id]

2899

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2905

"""Turn the encrypted s field into a working signature"""

2906

extract_sig = self._cached(

2907

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2908

func = extract_sig(video_id, player_url, s)

2909

self._print_sig_code(func, s)

2910

return func(s)

2911

2912

def _decrypt_nsig(self, s, video_id, player_url):

2913

"""Turn the encrypted n field into a working signature"""

2914

if player_url is None:

2915

raise ExtractorError('Cannot decrypt nsig without player_url')

2916

player_url = urljoin('https://www.youtube.com', player_url)

2917

2918

try:

2919

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2920

except ExtractorError as e:

2921

raise ExtractorError('Unable to extract nsig function code', cause=e)

2922

if self.get_param('youtube_print_sig_code'):

2923

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2924

2925

try:

2926

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2927

ret = extract_nsig(jsi, func_code)(s)

2928

except JSInterpreter.Exception as e:

2929

try:

2930

jsi = PhantomJSwrapper(self, timeout=5000)

2931

except ExtractorError:

2932

raise e

2933

self.report_warning(

2934

f'Native nsig extraction failed: Trying with PhantomJS\n'

2935

f' n = {s} ; player = {player_url}', video_id)

2936

self.write_debug(e, only_once=True)

2937

2938

args, func_body = func_code

2939

ret = jsi.execute(

2940

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2941

video_id=video_id, note='Executing signature code').strip()

2942

2943

self.write_debug(f'Decrypted nsig {s} => {ret}')

2944

return ret

2945

2946

def _extract_n_function_name(self, jscode):

2947

funcname, idx = self._search_regex(

2948

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2949

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2954

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2955

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2956

2957

def _extract_n_function_code(self, video_id, player_url):

2958

player_id = self._extract_player_info(player_url)

2959

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

2960

jscode = func_code or self._load_player(video_id, player_url)

2961

jsi = JSInterpreter(jscode)

2962

2963

if func_code:

2964

return jsi, player_id, func_code

2965

2966

func_name = self._extract_n_function_name(jscode)

2967

2968

# For redundancy

2969

func_code = self._search_regex(

2970

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

2971

# NB: The end of the regex is intentionally kept strict

2972

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

2973

jscode, 'nsig function', group=('var', 'code'), default=None)

2974

if func_code:

2975

func_code = ([func_code[0]], func_code[1])

2976

else:

2977

self.write_debug('Extracting nsig function with jsinterp')

2978

func_code = jsi.extract_function_code(func_name)

2979

2980

self.cache.store('youtube-nsig', player_id, func_code)

2981

return jsi, player_id, func_code

2982

2983

def _extract_n_function_from_code(self, jsi, func_code):

2984

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2990

raise

2991

except Exception as e:

2992

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2993

2994

if ret.startswith('enhanced_except_'):

2995

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

3001

"""

3002

Extract signatureTimestamp (sts)

3003

Required to tell API what sig/player version is in use.

3004

"""

3005

sts = None

3006

if isinstance(ytcfg, dict):

3007

sts = int_or_none(ytcfg.get('STS'))

3008

3009

if not sts:

3010

# Attempt to extract from player

3011

if player_url is None:

3012

error_msg = 'Cannot extract signature timestamp without player_url.'

3013

if fatal:

3014

raise ExtractorError(error_msg)

3015

self.report_warning(error_msg)

3016

return

3017

code = self._load_player(video_id, player_url, fatal=fatal)

3018

if code:

3019

sts = int_or_none(self._search_regex(

3020

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

3021

'JS player signature timestamp', group='sts', fatal=fatal))

3022

return sts

3023

3024

def _mark_watched(self, video_id, player_responses):

3025

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

3026

label = 'fully ' if is_full else ''

3027

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

3028

expected_type=url_or_none)

3029

if not url:

3030

self.report_warning(f'Unable to mark {label}watched')

3031

return

3032

parsed_url = urllib.parse.urlparse(url)

3033

qs = urllib.parse.parse_qs(parsed_url.query)

3034

3035

# cpn generation algorithm is reverse engineered from base.js.

3036

# In fact it works even with dummy cpn.

3037

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

3038

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

3039

3040

# # more consistent results setting it to right before the end

3041

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

3052

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

3059

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

3060

3061

self._download_webpage(

3062

url, video_id, f'Marking {label}watched',

3063

'Unable to mark watched', fatal=False)

3064

3065

@classmethod

3066

def _extract_from_webpage(cls, url, webpage):

3067

# Invidious Instances

3068

# https://github.com/yt-dlp/yt-dlp/issues/195

3069

# https://github.com/iv-org/invidious/pull/1730

3070

mobj = re.search(

3071

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3072

webpage)

3073

if mobj:

3074

yield cls.url_result(mobj.group('url'), cls)

3075

raise cls.StopExtraction()

3076

3077

yield from super()._extract_from_webpage(url, webpage)

3078

3079

# lazyYT YouTube embed

3080

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3081

yield cls.url_result(unescapeHTML(id_), cls, id_)

3082

3083

# Wordpress "YouTube Video Importer" plugin

3084

for m in re.findall(r'''(?x)<div[^>]+

3085

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3086

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3087

yield cls.url_result(m[-1], cls, m[-1])

3088

3089

@classmethod

3090

def extract_id(cls, url):

3091

video_id = cls.get_temp_id(url)

3092

if not video_id:

3093

raise ExtractorError(f'Invalid URL: {url}')

3094

return video_id

3095

3096

def _extract_chapters_from_json(self, data, duration):

3097

chapter_list = traverse_obj(

3098

data, (

3099

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3100

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3101

), expected_type=list)

3102

3103

return self._extract_chapters(

3104

chapter_list,

3105

chapter_time=lambda chapter: float_or_none(

3106

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3107

chapter_title=lambda chapter: traverse_obj(

3108

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3109

duration=duration)

3110

3111

def _extract_chapters_from_engagement_panel(self, data, duration):

3112

content_list = traverse_obj(

3113

data,

3114

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3115

expected_type=list, default=[])

3116

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3117

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3118

3119

return next(filter(None, (

3120

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3121

chapter_time, chapter_title, duration)

3122

for contents in content_list)), [])

3123

3124

def _extract_chapters_from_description(self, description, duration):

3125

duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'

3126

sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'

3127

return self._extract_chapters(

3128

re.findall(sep_re % (duration_re, r'.+?'), description or ''),

3129

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3130

duration=duration, strict=False) or self._extract_chapters(

3131

re.findall(sep_re % (r'.+?', duration_re), description or ''),

3132

chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],

3133

duration=duration, strict=False)

3134

3135

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3140

'title': chapter_title(chapter),

3141

} for chapter in chapter_list or []]

3142

if not strict:

3143

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3144

3145

chapters = [{'start_time': 0}]

3146

for idx, chapter in enumerate(chapter_list):

3147

if chapter['start_time'] is None:

3148

self.report_warning(f'Incomplete chapter {idx}')

3149

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3150

chapters.append(chapter)

3151

elif chapter not in chapters:

3152

self.report_warning(

3153

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3154

return chapters[1:]

3155

3156

def _extract_comment(self, comment_renderer, parent=None):

3157

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3162

3163

# Timestamp is an estimate calculated from the current time and time_text

3164

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3165

timestamp = self._parse_time_text(time_text)

3166

3167

author = self._get_text(comment_renderer, 'authorText')

3168

author_id = try_get(comment_renderer,

3169

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3170

3171

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3172

lambda x: x['likeCount']), str)) or 0

3173

author_thumbnail = try_get(comment_renderer,

3174

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3175

3176

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3177

is_favorited = 'creatorHeart' in (try_get(

3178

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3183

'time_text': time_text,

3184

'like_count': votes,

3185

'is_favorited': is_favorited,

3186

'author': author,

3187

'author_id': author_id,

3188

'author_thumbnail': author_thumbnail,

3189

'author_is_uploader': author_is_uploader,

3190

'parent': parent or 'root'

3191

}

3192

3193

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3194

3195

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3196

3197

def extract_header(contents):

3198

_continuation = None

3199

for content in contents:

3200

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3201

expected_comment_count = self._get_count(

3202

comments_header_renderer, 'countText', 'commentsCount')

3203

3204

if expected_comment_count:

3205

tracker['est_total'] = expected_comment_count

3206

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3207

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3208

3209

sort_menu_item = try_get(

3210

comments_header_renderer,

3211

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3212

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3213

3214

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3215

if not _continuation:

3216

continue

3217

3218

sort_text = str_or_none(sort_menu_item.get('title'))

3219

if not sort_text:

3220

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3221

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3226

if not parent:

3227

tracker['current_page_thread'] = 0

3228

for content in contents:

3229

if not parent and tracker['total_parent_comments'] >= max_parents:

3230

yield

3231

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3232

comment_renderer = get_first(

3233

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3234

expected_type=dict, default={})

3235

3236

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3241

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3242

yield comment

3243

3244

# Attempt to get the replies

3245

comment_replies_renderer = try_get(

3246

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3247

3248

if comment_replies_renderer:

3249

tracker['current_page_thread'] += 1

3250

comment_entries_iter = self._comment_entries(

3251

comment_replies_renderer, ytcfg, video_id,

3252

parent=comment.get('id'), tracker=tracker)

3253

yield from itertools.islice(comment_entries_iter, min(

3254

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3255

3256

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3262

total_parent_comments=0,

3263

total_reply_comments=0)

3264

3265

# TODO: Deprecated

3266

# YouTube comments have a max depth of 2

3267

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3268

if max_depth:

3269

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3270

'Set max replies in the max-comments extractor argument instead')

3271

if max_depth == 1 and parent:

3272

return

3273

3274

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3275

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3276

3277

continuation = self._extract_continuation(root_continuation_data)

3278

3279

response = None

3280

is_forced_continuation = False

3281

is_first_continuation = parent is None

3282

if is_first_continuation and not continuation:

3283

# Sometimes you can get comments by generating the continuation yourself,

3284

# even if YouTube initially reports them being disabled - e.g. stories comments.

3285

# Note: if the comment section is actually disabled, YouTube may return a response with

3286

# required check_get_keys missing. So we will disable that check initially in this case.

3287

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3288

is_forced_continuation = True

3289

3290

for page_num in itertools.count(0):

3291

if not continuation:

3292

break

3293

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3294

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3295

if page_num == 0:

3296

if is_first_continuation:

3297

note_prefix = 'Downloading comment section API JSON'

3298

else:

3299

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3300

tracker['current_page_thread'], comment_prog_str)

3301

else:

3302

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3303

' ' if parent else '', ' replies' if parent else '',

3304

page_num, comment_prog_str)

3305

try:

3306

response = self._extract_response(

3307

item_id=None, query=continuation,

3308

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3309

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3310

except ExtractorError as e:

3311

# Ignore incomplete data error for replies if retries didn't work.

3312

# This is to allow any other parent comments and comment threads to be downloaded.

3313

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3314

if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:

3315

self.report_warning(

3316

'Received incomplete data for a comment reply thread and retrying did not help. '

3317

'Ignoring to let other comments be downloaded.')

3318

else:

3319

raise

3320

is_forced_continuation = False

3321

continuation_contents = traverse_obj(

3322

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3323

3324

continuation = None

3325

for continuation_section in continuation_contents:

3326

continuation_items = traverse_obj(

3327

continuation_section,

3328

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3329

get_all=False, expected_type=list) or []

3330

if is_first_continuation:

3331

continuation = extract_header(continuation_items)

3332

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3346

if message and not parent and tracker['running_total'] == 0:

3347

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3348

raise self.CommentsDisabled

3349

3350

@staticmethod

3351

def _generate_comment_continuation(video_id):

3352

"""

3353

Generates initial comment section continuation token from given video id

3354

"""

3355

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3356

return base64.b64encode(token.encode()).decode()

3357

3358

def _get_comments(self, ytcfg, video_id, contents, webpage):

3359

"""Entry for comment extraction"""

3360

def _real_comment_extract(contents):

3361

renderer = next((

3362

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3363

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3364

yield from self._comment_entries(renderer, ytcfg, video_id)

3365

3366

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3367

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3368

3369

@staticmethod

3370

def _get_checkok_params():

3371

return {'contentCheckOk': True, 'racyCheckOk': True}

3372

3373

@classmethod

3374

def _generate_player_context(cls, sts=None):

3375

context = {

3376

'html5Preference': 'HTML5_PREF_WANTS',

3377

}

3378

if sts is not None:

3379

context['signatureTimestamp'] = sts

3380

return {

3381

'playbackContext': {

3382

'contentPlaybackContext': context

3383

},

3384

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3389

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3390

return True

3391

3392

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3393

AGE_GATE_REASONS = (

3394

'confirm your age', 'age-restricted', 'inappropriate', # reason

3395

'age_verification_required', 'age_check_required', # status

3396

)

3397

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3398

3399

@staticmethod

3400

def _is_unplayable(player_response):

3401

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3402

3403

_STORY_PLAYER_PARAMS = '8AEB'

3404

3405

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3406

3407

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3408

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3409

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3410

headers = self.generate_api_headers(

3411

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3417

yt_query['params'] = self._STORY_PLAYER_PARAMS

3418

3419

yt_query.update(self._generate_player_context(sts))

3420

return self._extract_response(

3421

item_id=video_id, ep='player', query=yt_query,

3422

ytcfg=player_ytcfg, headers=headers, fatal=True,

3423

default_client=client,

3424

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3425

) or None

3426

3427

def _get_requested_clients(self, url, smuggled_data):

3428

requested_clients = []

3429

default = ['android', 'web']

3430

allowed_clients = sorted(

3431

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3432

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3433

for client in self._configuration_arg('player_client'):

3434

if client in allowed_clients:

3435

requested_clients.append(client)

3436

elif client == 'default':

3437

requested_clients.extend(default)

3438

elif client == 'all':

3439

requested_clients.extend(allowed_clients)

3440

else:

3441

self.report_warning(f'Skipping unsupported client {client}')

3442

if not requested_clients:

3443

requested_clients = default

3444

3445

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3446

requested_clients.extend(

3447

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3448

3449

return orderedSet(requested_clients)

3450

3451

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3452

initial_pr = None

3453

if webpage:

3454

initial_pr = self._search_json(

3455

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3456

3457

all_clients = set(clients)

3458

clients = clients[::-1]

3459

prs = []

3460

3461

def append_client(*client_names):

3462

""" Append the first client name that exists but not already used """

3463

for client_name in client_names:

3464

actual_client = _split_innertube_client(client_name)[0]

3465

if actual_client in INNERTUBE_CLIENTS:

3466

if actual_client not in all_clients:

3467

clients.append(client_name)

3468

all_clients.add(actual_client)

3469

return

3470

3471

# Android player_response does not have microFormats which are needed for

3472

# extraction of some data. So we return the initial_pr with formats

3473

# stripped out even if not requested by the user

3474

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3475

if initial_pr:

3476

pr = dict(initial_pr)

3477

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3482

player_url = None

3483

while clients:

3484

client, base_client, variant = _split_innertube_client(clients.pop())

3485

player_ytcfg = master_ytcfg if client == 'web' else {}

3486

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3487

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3488

3489

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3490

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3491

if 'js' in self._configuration_arg('player_skip'):

3492

require_js_player = False

3493

player_url = None

3494

3495

if not player_url and not tried_iframe_fallback and require_js_player:

3496

player_url = self._download_player_url(video_id)

3497

tried_iframe_fallback = True

3498

3499

try:

3500

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3501

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3502

except ExtractorError as e:

3503

if last_error:

3504

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3510

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3511

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3512

if pr_video_id and pr_video_id != video_id:

3513

self.report_warning(

3514

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3519

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3520

append_client(f'{base_client}_creator')

3521

elif self._is_agegated(pr):

3522

if variant == 'tv_embedded':

3523

append_client(f'{base_client}_embedded')

3524

elif not variant:

3525

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3531

return prs, player_url

3532

3533

def _needs_live_processing(self, live_status, duration):

3534

if (live_status == 'is_live' and self.get_param('live_from_start')

3535

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3536

return live_status

3537

3538

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3539

itags, stream_ids = collections.defaultdict(set), []

3540

itag_qualities, res_qualities = {}, {0: None}

3541

q = qualities([

3542

# Normally tiny is the smallest video-only formats. But

3543

# audio-only formats with unknown quality may get tagged as tiny

3544

'tiny',

3545

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3546

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3547

])

3548

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3549

3550

for fmt in streaming_formats:

3551

if fmt.get('targetDurationSec'):

3552

continue

3553

3554

itag = str_or_none(fmt.get('itag'))

3555

audio_track = fmt.get('audioTrack') or {}

3556

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3557

if stream_id in stream_ids:

3558

continue

3559

3560

quality = fmt.get('quality')

3561

height = int_or_none(fmt.get('height'))

3562

if quality == 'tiny' or not quality:

3563

quality = fmt.get('audioQuality', '').lower() or quality

3564

# The 3gp format (17) in android client has a quality of "small",

3565

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3571

if height:

3572

res_qualities[height] = quality

3573

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3574

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3575

# number of fragment that would subsequently requested with (`&sq=N`)

3576

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3577

continue

3578

3579

fmt_url = fmt.get('url')

3580

if not fmt_url:

3581

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3582

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3583

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3584

if not all((sc, fmt_url, player_url, encrypted_sig)):

3585

continue

3586

try:

3587

fmt_url += '&%s=%s' % (

3588

traverse_obj(sc, ('sp', -1)) or 'signature',

3589

self._decrypt_signature(encrypted_sig, video_id, player_url)

3590

)

3591

except ExtractorError as e:

3592

self.report_warning('Signature extraction failed: Some formats may be missing',

3593

video_id=video_id, only_once=True)

3594

self.write_debug(e, only_once=True)

3595

continue

3596

3597

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3602

fmt_url = update_url_query(fmt_url, {

3603

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3604

})

3605

except ExtractorError as e:

3606

phantomjs_hint = ''

3607

if isinstance(e, JSInterpreter.Exception):

3608

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3609

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3610

if player_url:

3611

self.report_warning(

3612

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3613

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3614

self.write_debug(e, only_once=True)

3615

else:

3616

self.report_warning(

3617

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3618

video_id=video_id, only_once=True)

3619

throttled = True

3620

3621

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3622

language_preference = (

3623

10 if audio_track.get('audioIsDefault') and 10

3624

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3625

else -1)

3626

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3627

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3628

# Make sure to avoid false positives with small duration differences.

3629

# E.g. __2ABJjxzNo, ySuUZEjARPY

3630

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3631

if is_damaged:

3632

self.report_warning(

3633

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3634

dct = {

3635

'asr': int_or_none(fmt.get('audioSampleRate')),

3636

'filesize': int_or_none(fmt.get('contentLength')),

3637

'format_id': itag,

3638

'format_note': join_nonempty(

3639

'%s%s' % (audio_track.get('displayName') or '',

3640

' (default)' if language_preference > 0 else ''),

3641

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3642

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3643

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3644

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3645

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3646

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3647

'fps': int_or_none(fmt.get('fps')) or None,

3648

'audio_channels': fmt.get('audioChannels'),

3649

'height': height,

3650

'quality': q(quality),

3651

'has_drm': bool(fmt.get('drmFamilies')),

3652

'tbr': tbr,

3653

'url': fmt_url,

3654

'width': int_or_none(fmt.get('width')),

3655

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3656

'desc' if language_preference < -1 else ''),

3657

'language_preference': language_preference,

3658

# Strictly de-prioritize damaged and 3gp formats

3659

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3660

}

3661

mime_mobj = re.match(

3662

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3663

if mime_mobj:

3664

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3665

dct.update(parse_codecs(mime_mobj.group(2)))

3666

no_audio = dct.get('acodec') == 'none'

3667

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3673

dct['downloader_options'] = {

3674

# Youtube throttles chunks >~10M

3675

'http_chunk_size': 10485760,

3676

}

3677

if dct.get('ext'):

3678

dct['container'] = dct['ext'] + '_dash'

3679

3680

if itag:

3681

itags[itag].add(('https', dct.get('language')))

3682

stream_ids.append(stream_id)

3683

yield dct

3684

3685

needs_live_processing = self._needs_live_processing(live_status, duration)

3686

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3687

3688

skip_manifests = set(self._configuration_arg('skip'))

3689

if (not self.get_param('youtube_include_hls_manifest', True)

3690

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3691

or needs_live_processing and skip_bad_formats):

3692

skip_manifests.add('hls')

3693

3694

if not self.get_param('youtube_include_dash_manifest', True):

3695

skip_manifests.add('dash')

3696

if self._configuration_arg('include_live_dash'):

3697

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3698

'Use include_incomplete_formats extractor argument instead')

3699

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3700

skip_manifests.add('dash')

3701

3702

def process_manifest_format(f, proto, itag):

3703

key = (proto, f.get('language'))

3704

if key in itags[itag]:

return False

itags[itag].add(key)

if any(p != proto for p, _ in itags[itag]):

3709

f['format_id'] = f'{itag}-{proto}'

3710

elif itag:

3711

f['format_id'] = itag

3712

3713

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3714

if f['quality'] == -1 and f.get('height'):

3715

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3720

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3721

if hls_manifest_url:

3722

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3723

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3724

subtitles = self._merge_subtitles(subs, subtitles)

3725

for f in fmts:

3726

if process_manifest_format(f, 'hls', self._search_regex(

3727

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3728

yield f

3729

3730

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3731

if dash_manifest_url:

3732

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3733

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3734

for f in formats:

3735

if process_manifest_format(f, 'dash', f['format_id']):

3736

f['filesize'] = int_or_none(self._search_regex(

3737

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3738

if needs_live_processing:

3739

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3745

spec = get_first(

3746

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3747

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3752

args = args.split('#')

3753

counts = list(map(int_or_none, args[:5]))

3754

if len(args) != 8 or not all(counts):

3755

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3756

continue

3757

width, height, frame_count, cols, rows = counts

3758

N, sigh = args[6:]

3759

3760

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3761

fragment_count = frame_count / (cols * rows)

3762

fragment_duration = duration / fragment_count

3763

yield {

3764

'format_id': f'sb{i}',

3765

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3778

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3779

} for j in range(math.ceil(fragment_count))],

3780

}

3781

3782

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3783

webpage = None

3784

if 'webpage' not in self._configuration_arg('player_skip'):

3785

query = {'bpctr': '9999999999', 'has_verified': '1'}

3786

if smuggled_data.get('is_story'):

3787

query['pp'] = self._STORY_PLAYER_PARAMS

3788

webpage = self._download_webpage(

3789

webpage_url, video_id, fatal=False, query=query)

3790

3791

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3792

3793

player_responses, player_url = self._extract_player_responses(

3794

self._get_requested_clients(url, smuggled_data),

3795

video_id, webpage, master_ytcfg, smuggled_data)

3796

3797

return webpage, master_ytcfg, player_responses, player_url

3798

3799

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3800

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3801

is_live = get_first(video_details, 'isLive')

3802

if is_live is None:

3803

is_live = get_first(live_broadcast_details, 'isLiveNow')

3804

live_content = get_first(video_details, 'isLiveContent')

3805

is_upcoming = get_first(video_details, 'isUpcoming')

3806

post_live = get_first(video_details, 'isPostLiveDvr')

3807

live_status = ('post_live' if post_live

3808

else 'is_live' if is_live

3809

else 'is_upcoming' if is_upcoming

3810

else 'was_live' if live_content

3811

else 'not_live' if False in (is_live, live_content)

3812

else None)

3813

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3814

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3815

3816

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3817

3818

def _real_extract(self, url):

3819

url, smuggled_data = unsmuggle_url(url, {})

3820

video_id = self._match_id(url)

3821

3822

base_url = self.http_scheme() + '//www.youtube.com/'

3823

webpage_url = base_url + 'watch?v=' + video_id

3824

3825

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3826

3827

playability_statuses = traverse_obj(

3828

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3829

3830

trailer_video_id = get_first(

3831

playability_statuses,

3832

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3833

expected_type=str)

3834

if trailer_video_id:

3835

return self.url_result(

3836

trailer_video_id, self.ie_key(), trailer_video_id)

3837

3838

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3839

if webpage else (lambda x: None))

3840

3841

video_details = traverse_obj(

3842

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3843

microformats = traverse_obj(

3844

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3845

expected_type=dict, default=[])

3846

3847

translated_title = self._get_text(microformats, (..., 'title'))

3848

video_title = (self._preferred_lang and translated_title

3849

or get_first(video_details, 'title') # primary

3850

or translated_title

3851

or search_meta(['og:title', 'twitter:title', 'title']))

3852

translated_description = self._get_text(microformats, (..., 'description'))

3853

original_description = get_first(video_details, 'shortDescription')

3854

video_description = (

3855

self._preferred_lang and translated_description

3856

# If original description is blank, it will be an empty string.

3857

# Do not prefer translated description in this case.

3858

or original_description if original_description is not None else translated_description)

3859

3860

multifeed_metadata_list = get_first(

3861

player_responses,

3862

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3863

expected_type=str)

3864

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3865

if self.get_param('noplaylist'):

3866

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3871

# Unquote should take place before split on comma (,) since textual

3872

# fields may contain comma as well (see

3873

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3874

feed_data = urllib.parse.parse_qs(

3875

urllib.parse.unquote_plus(feed))

3876

3877

def feed_entry(name):

3878

return try_get(

3879

feed_data, lambda x: x[name][0], str)

3880

3881

feed_id = feed_entry('id')

3882

if not feed_id:

3883

continue

3884

feed_title = feed_entry('title')

3885

title = video_title

3886

if feed_title:

3887

title += ' (%s)' % feed_title

3888

entries.append({

3889

'_type': 'url_transparent',

3890

'ie_key': 'Youtube',

3891

'url': smuggle_url(

3892

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3893

{'force_singlefeed': True}),

3894

'title': title,

3895

})

3896

feed_ids.append(feed_id)

3897

self.to_screen(

3898

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3899

% (', '.join(feed_ids), video_id))

3900

return self.playlist_result(

3901

entries, video_id, video_title, video_description)

3902

3903

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

3904

or int_or_none(get_first(microformats, 'lengthSeconds'))

3905

or parse_duration(search_meta('duration')) or None)

3906

3907

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3908

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3909

if live_status == 'post_live':

3910

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3911

3912

if not formats:

3913

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3914

self.report_drm(video_id)

3915

pemr = get_first(

3916

playability_statuses,

3917

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3918

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3919

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3920

if subreason:

3921

if subreason == 'The uploader has not made this video available in your country.':

3922

countries = get_first(microformats, 'availableCountries')

3923

if not countries:

3924

regions_allowed = search_meta('regionsAllowed')

3925

countries = regions_allowed.split(',') if regions_allowed else None

3926

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3927

reason += f'. {subreason}'

3928

if reason:

3929

self.raise_no_formats(reason, expected=True)

3930

3931

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3932

if not keywords and webpage:

3933

keywords = [

3934

unescapeHTML(m.group('content'))

3935

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3936

for keyword in keywords:

3937

if keyword.startswith('yt:stretch='):

3938

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3939

if mobj:

3940

# NB: float is intentional for forcing float division

3941

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3946

f['stretched_ratio'] = ratio

3947

break

3948

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3949

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3950

if thumbnail_url:

3951

thumbnails.append({

3952

'url': thumbnail_url,

3953

})

3954

original_thumbnails = thumbnails.copy()

3955

3956

# The best resolution thumbnails sometimes does not appear in the webpage

3957

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3958

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3959

thumbnail_names = [

3960

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3961

# in resolution, these are not the custom thumbnail. So de-prioritize them

3962

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3963

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3964

]

3965

n_thumbnail_names = len(thumbnail_names)

3966

thumbnails.extend({

3967

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3968

video_id=video_id, name=name, ext=ext,

3969

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

3970

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3971

for thumb in thumbnails:

3972

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3973

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3974

self._remove_duplicate_formats(thumbnails)

3975

self._downloader._sort_thumbnails(original_thumbnails)

3976

3977

category = get_first(microformats, 'category') or search_meta('genre')

3978

channel_id = str_or_none(

3979

get_first(video_details, 'channelId')

3980

or get_first(microformats, 'externalChannelId')

3981

or search_meta('channelId'))

3982

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3983

3984

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3985

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3986

if not duration and live_end_time and live_start_time:

3987

duration = live_end_time - live_start_time

3988

3989

needs_live_processing = self._needs_live_processing(live_status, duration)

3990

3991

def is_bad_format(fmt):

3992

if needs_live_processing and not fmt.get('is_from_start'):

3993

return True

3994

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

3995

and fmt.get('protocol') == 'http_dash_segments'):

3996

return True

3997

3998

for fmt in filter(is_bad_format, formats):

3999

fmt['preference'] = (fmt.get('preference') or -1) - 10

4000

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

4001

4002

if needs_live_processing:

4003

self._prepare_live_from_start_formats(

4004

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

4005

4006

formats.extend(self._extract_storyboard(player_responses, duration))

info = {

'id': video_id,

'title': video_title,

4011

'formats': formats,

4012

'thumbnails': thumbnails,

4013

# The best thumbnail that we are sure exists. Prevents unnecessary

4014

# URL checking if user don't care about getting the best possible thumbnail

4015

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

4016

'description': video_description,

4017

'uploader': get_first(video_details, 'author'),

4018

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

4019

'uploader_url': owner_profile_url,

4020

'channel_id': channel_id,

4021

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

4022

'duration': duration,

4023

'view_count': int_or_none(

4024

get_first((video_details, microformats), (..., 'viewCount'))

4025

or search_meta('interactionCount')),

4026

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

4027

'age_limit': 18 if (

4028

get_first(microformats, 'isFamilySafe') is False

4029

or search_meta('isFamilyFriendly') == 'false'

4030

or search_meta('og:restrictions:age') == '18+') else 0,

4031

'webpage_url': webpage_url,

4032

'categories': [category] if category else None,

4033

'tags': keywords,

4034

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

4035

'live_status': live_status,

4036

'release_timestamp': live_start_time,

4037

'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats

4038

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

4043

if pctr:

4044

def get_lang_code(track):

4045

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

4046

or track.get('languageCode'))

4047

4048

# Converted into dicts to remove duplicates

4049

captions = {

4050

get_lang_code(sub): sub

4051

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

4052

translation_languages = {

4053

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

4054

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

4055

4056

def process_language(container, base_url, lang_code, sub_name, query):

4057

lang_subs = container.setdefault(lang_code, [])

4058

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4069

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4070

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4071

for lang_code, caption_track in captions.items():

4072

base_url = caption_track.get('baseUrl')

4073

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4074

if not base_url:

4075

continue

4076

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4077

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4082

if not caption_track.get('isTranslatable'):

4083

continue

4084

for trans_code, trans_name in translation_languages.items():

4085

if not trans_code:

4086

continue

4087

orig_trans_code = trans_code

4088

if caption_track.get('kind') != 'asr' and trans_code != 'und':

4089

if not get_translated_subs:

4090

continue

4091

trans_code += f'-{lang_code}'

4092

trans_name += format_field(lang_name, None, ' from %s')

4093

# Add an "-orig" label to the original language so that it can be distinguished.

4094

# The subs are returned without "-orig" as well for compatibility

4095

if lang_code == f'a-{orig_trans_code}':

4096

process_language(

4097

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4098

# Setting tlang=lang returns damaged subtitles.

4099

process_language(automatic_captions, base_url, trans_code, trans_name,

4100

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4101

4102

info['automatic_captions'] = automatic_captions

4103

info['subtitles'] = subtitles

4104

4105

parsed_url = urllib.parse.urlparse(url)

4106

for component in [parsed_url.fragment, parsed_url.query]:

4107

query = urllib.parse.parse_qs(component)

4108

for k, v in query.items():

4109

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4110

d_k += '_time'

4111

if d_k not in info and k in s_ks:

4112

info[d_k] = parse_duration(query[k][0])

4113

4114

# Youtube Music Auto-generated description

4115

if video_description:

4116

mobj = re.search(

4117

r'''(?xs)

4118

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4119

(?P<album>[^\n]+)

4120

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4121

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4122

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4123

.+\nAuto-generated\ by\ YouTube\.\s*$

4124

''', video_description)

4125

if mobj:

4126

release_year = mobj.group('release_year')

4127

release_date = mobj.group('release_date')

4128

if release_date:

4129

release_date = release_date.replace('-', '')

4130

if not release_year:

4131

release_year = release_date[:4]

4132

info.update({

4133

'album': mobj.group('album'.strip()),

4134

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4135

'track': mobj.group('track').strip(),

4136

'release_date': release_date,

4137

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4143

if not initial_data:

4144

query = {'videoId': video_id}

4145

query.update(self._get_checkok_params())

4146

initial_data = self._extract_response(

4147

item_id=video_id, ep='next', fatal=False,

4148

ytcfg=master_ytcfg, query=query,

4149

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4150

note='Downloading initial data API JSON')

4151

4152

info['comment_count'] = traverse_obj(initial_data, (

4153

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4154

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4155

), (

4156

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4157

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4158

), expected_type=int_or_none, get_all=False)

4159

4160

try: # This will error if there is no livechat

4161

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4162

except (KeyError, IndexError, TypeError):

4163

pass

4164

else:

4165

info.setdefault('subtitles', {})['live_chat'] = [{

4166

# url is needed to set cookies

4167

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4168

'video_id': video_id,

4169

'ext': 'json',

4170

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4171

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4177

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4178

or self._extract_chapters_from_description(video_description, duration)

4179

or None)

4180

4181

contents = traverse_obj(

4182

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4183

expected_type=list, default=[])

4184

4185

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4186

if vpir:

4187

stl = vpir.get('superTitleLink')

4188

if stl:

4189

stl = self._get_text(stl)

4190

if try_get(

4191

vpir,

4192

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4193

info['location'] = stl

4194

else:

4195

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4196

if mobj:

4197

info.update({

4198

'series': mobj.group(1),

4199

'season_number': int(mobj.group(2)),

4200

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4209

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4210

default=[]))

4211

for tbr in tbrs:

4212

for getter, regex in [(

4213

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4214

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4215

lambda x: x['accessibility'],

4216

lambda x: x['accessibilityData']['accessibilityData'],

4217

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4218

label = (try_get(tbr, getter, dict) or {}).get('label')

4219

if label:

4220

mobj = re.match(regex, label)

4221

if mobj:

4222

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4223

break

4224

sbr_tooltip = try_get(

4225

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4226

if sbr_tooltip:

4227

like_count, dislike_count = sbr_tooltip.split(' / ')

4228

info.update({

4229

'like_count': str_to_int(like_count),

4230

'dislike_count': str_to_int(dislike_count),

4231

})

4232

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4233

if vcr:

4234

vc = self._get_count(vcr, 'viewCount')

4235

# Upcoming premieres with waiting count are treated as live here

4236

if vcr.get('isLive'):

4237

info['concurrent_view_count'] = vc

4238

elif info.get('view_count') is None:

4239

info['view_count'] = vc

4240

4241

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4242

if vsir:

4243

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4244

info.update({

4245

'channel': self._get_text(vor, 'title'),

4246

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4251

list) or []

4252

multiple_songs = False

4253

for row in rows:

4254

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4255

multiple_songs = True

4256

break

4257

for row in rows:

4258

mrr = row.get('metadataRowRenderer') or {}

4259

mrr_title = mrr.get('title')

4260

if not mrr_title:

4261

continue

4262

mrr_title = self._get_text(mrr, 'title')

4263

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4264

if mrr_title == 'License':

4265

info['license'] = mrr_contents_text

4266

elif not multiple_songs:

4267

if mrr_title == 'Album':

4268

info['album'] = mrr_contents_text

4269

elif mrr_title == 'Artist':

4270

info['artist'] = mrr_contents_text

4271

elif mrr_title == 'Song':

4272

info['track'] = mrr_contents_text

4273

4274

fallbacks = {

4275

'channel': 'uploader',

4276

'channel_id': 'uploader_id',

4277

'channel_url': 'uploader_url',

4278

}

4279

4280

# The upload date for scheduled, live and past live streams / premieres in microformats

4281

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4282

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4283

upload_date = (

4284

unified_strdate(get_first(microformats, 'uploadDate'))

4285

or unified_strdate(search_meta('uploadDate')))

4286

if not upload_date or (

4287

live_status in ('not_live', None)

4288

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4289

):

4290

upload_date = strftime_or_none(

4291

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4292

info['upload_date'] = upload_date

4293

4294

for to, frm in fallbacks.items():

4295

if not info.get(to):

4296

info[to] = info.get(frm)

4297

4298

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4304

4305

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4306

or get_first(video_details, 'isPrivate', expected_type=bool))

4307

4308

info['availability'] = (

4309

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4310

else self._availability(

4311

is_private=is_private,

4312

needs_premium=(

4313

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4314

or False if initial_data and is_private is not None else None),

4315

needs_subscription=(

4316

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4317

or False if initial_data and is_private is not None else None),

4318

needs_auth=info['age_limit'] >= 18,

4319

is_unlisted=None if is_private is None else (

4320

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4321

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4322

4323

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4324

4325

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4331

@staticmethod

4332

def passthrough_smuggled_data(func):

4333

def _smuggle(info, smuggled_data):

4334

if info.get('_type') not in ('url', 'url_transparent'):

4335

return info

4336

if smuggled_data.get('is_music_url'):

4337

parsed_url = urllib.parse.urlparse(info['url'])

4338

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4339

smuggled_data.pop('is_music_url')

4340

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4341

if smuggled_data:

4342

info['url'] = smuggle_url(info['url'], smuggled_data)

4343

return info

4344

4345

@functools.wraps(func)

4346

def wrapper(self, url):

4347

url, smuggled_data = unsmuggle_url(url, {})

4348

if self.is_music_url(url):

4349

smuggled_data['is_music_url'] = True

4350

info_dict = func(self, url, smuggled_data)

4351

if smuggled_data:

4352

_smuggle(info_dict, smuggled_data)

4353

if info_dict.get('entries'):

4354

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4359

channel_id = self._html_search_meta(

4360

'channelId', webpage, 'channel id', default=None)

4361

if channel_id:

4362

return channel_id

4363

channel_url = self._html_search_meta(

4364

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4365

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4366

'twitter:app:url:googleplay'), webpage, 'channel url')

4367

return self._search_regex(

4368

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4369

channel_url, 'channel id')

4370

4371

@staticmethod

4372

def _extract_basic_item_renderer(item):

4373

# Modified from _extract_grid_item_renderer

4374

known_basic_renderers = (

4375

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4376

)

4377

for key, renderer in item.items():

4378

if not isinstance(renderer, dict):

4379

continue

4380

elif key in known_basic_renderers:

4381

return renderer

4382

elif key.startswith('grid') and key.endswith('Renderer'):

4383

return renderer

4384

4385

def _extract_channel_renderer(self, renderer):

4386

channel_id = renderer['channelId']

4387

title = self._get_text(renderer, 'title')

4388

channel_url = f'https://www.youtube.com/channel/{channel_id}'

return {

'_type': 'url',

'url': channel_url,

'id': channel_id,

'ie_key': YoutubeTabIE.ie_key(),

4394

'channel': title,

4395

'channel_id': channel_id,

4396

'channel_url': channel_url,

4397

'title': title,

4398

'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),

4399

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

4400

'playlist_count': self._get_count(renderer, 'videoCountText'),

4401

'description': self._get_text(renderer, 'descriptionSnippet'),

4402

}

4403

4404

def _grid_entries(self, grid_renderer):

4405

for item in grid_renderer['items']:

4406

if not isinstance(item, dict):

4407

continue

4408

renderer = self._extract_basic_item_renderer(item)

4409

if not isinstance(renderer, dict):

4410

continue

4411

title = self._get_text(renderer, 'title')

4412

4413

# playlist

4414

playlist_id = renderer.get('playlistId')

4415

if playlist_id:

4416

yield self.url_result(

4417

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4418

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4423

if video_id:

4424

yield self._extract_video(renderer)

4425

continue

4426

# channel

4427

channel_id = renderer.get('channelId')

4428

if channel_id:

4429

yield self._extract_channel_renderer(renderer)

4430

continue

4431

# generic endpoint URL support

4432

ep_url = urljoin('https://www.youtube.com/', try_get(

4433

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4434

str))

4435

if ep_url:

4436

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4437

if ie.suitable(ep_url):

4438

yield self.url_result(

4439

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4440

break

4441

4442

def _music_reponsive_list_entry(self, renderer):

4443

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4444

if video_id:

4445

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4446

ie=YoutubeIE.ie_key(), video_id=video_id)

4447

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4448

if playlist_id:

4449

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4450

if video_id:

4451

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4452

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4453

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4454

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4455

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4456

if browse_id:

4457

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4458

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4459

4460

def _shelf_entries_from_content(self, shelf_renderer):

4461

content = shelf_renderer.get('content')

4462

if not isinstance(content, dict):

4463

return

4464

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4465

if renderer:

4466

# TODO: add support for nested playlists so each shelf is processed

4467

# as separate playlist

4468

# TODO: this includes only first N items

4469

yield from self._grid_entries(renderer)

4470

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4476

ep = try_get(

4477

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4478

str)

4479

shelf_url = urljoin('https://www.youtube.com', ep)

4480

if shelf_url:

4481

# Skipping links to another channels, note that checking for

4482

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4483

# will not work

4484

if skip_channels and '/channels?' in shelf_url:

4485

return

4486

title = self._get_text(shelf_renderer, 'title')

4487

yield self.url_result(shelf_url, video_title=title)

4488

# Shelf may not contain shelf URL, fallback to extraction from content

4489

yield from self._shelf_entries_from_content(shelf_renderer)

4490

4491

def _playlist_entries(self, video_list_renderer):

4492

for content in video_list_renderer['contents']:

4493

if not isinstance(content, dict):

4494

continue

4495

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4496

if not isinstance(renderer, dict):

4497

continue

4498

video_id = renderer.get('videoId')

4499

if not video_id:

4500

continue

4501

yield self._extract_video(renderer)

4502

4503

def _rich_entries(self, rich_grid_renderer):

4504

renderer = traverse_obj(

4505

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4506

video_id = renderer.get('videoId')

4507

if not video_id:

4508

return

4509

yield self._extract_video(renderer)

4510

4511

def _video_entry(self, video_renderer):

4512

video_id = video_renderer.get('videoId')

4513

if video_id:

4514

return self._extract_video(video_renderer)

4515

4516

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4517

url = urljoin('https://youtube.com', traverse_obj(

4518

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4519

if url:

4520

return self.url_result(

4521

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4522

4523

def _post_thread_entries(self, post_thread_renderer):

4524

post_renderer = try_get(

4525

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4526

if not post_renderer:

4527

return

4528

# video attachment

4529

video_renderer = try_get(

4530

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4531

video_id = video_renderer.get('videoId')

4532

if video_id:

4533

entry = self._extract_video(video_renderer)

4534

if entry:

4535

yield entry

4536

# playlist attachment

4537

playlist_id = try_get(

4538

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4539

if playlist_id:

4540

yield self.url_result(

4541

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4542

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4543

# inline video links

4544

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4545

for run in runs:

4546

if not isinstance(run, dict):

4547

continue

4548

ep_url = try_get(

4549

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4550

if not ep_url:

4551

continue

4552

if not YoutubeIE.suitable(ep_url):

4553

continue

4554

ep_video_id = YoutubeIE._match_id(ep_url)

4555

if video_id == ep_video_id:

4556

continue

4557

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4558

4559

def _post_thread_continuation_entries(self, post_thread_continuation):

4560

contents = post_thread_continuation.get('contents')

4561

if not isinstance(contents, list):

4562

return

4563

for content in contents:

4564

renderer = content.get('backstagePostThreadRenderer')

4565

if isinstance(renderer, dict):

4566

yield from self._post_thread_entries(renderer)

4567

continue

4568

renderer = content.get('videoRenderer')

4569

if isinstance(renderer, dict):

4570

yield self._video_entry(renderer)

4571

4572

r''' # unused

4573

def _rich_grid_entries(self, contents):

4574

for content in contents:

4575

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4576

if video_renderer:

4577

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4583

for url in traverse_obj(renderer, (

4584

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4585

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4586

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4587

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4588

4589

def _extract_entries(self, parent_renderer, continuation_list):

4590

# continuation_list is modified in-place with continuation_list = [continuation_token]

4591

continuation_list[:] = [None]

4592

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4593

for content in contents:

4594

if not isinstance(content, dict):

4595

continue

4596

is_renderer = traverse_obj(

4597

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4598

expected_type=dict)

4599

if not is_renderer:

4600

if content.get('richItemRenderer'):

4601

for entry in self._rich_entries(content['richItemRenderer']):

4602

yield entry

4603

continuation_list[0] = self._extract_continuation(parent_renderer)

4604

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4605

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4606

yield from self._report_history_entries(table)

4607

continuation_list[0] = self._extract_continuation(table)

4608

continue

4609

4610

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4611

for isr_content in isr_contents:

4612

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4617

'gridRenderer': self._grid_entries,

4618

'reelShelfRenderer': self._grid_entries,

4619

'shelfRenderer': self._shelf_entries,

4620

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4621

'backstagePostThreadRenderer': self._post_thread_entries,

4622

'videoRenderer': lambda x: [self._video_entry(x)],

4623

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4624

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4625

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4626

}

4627

for key, renderer in isr_content.items():

4628

if key not in known_renderers:

4629

continue

4630

for entry in known_renderers[key](renderer):

4631

if entry:

4632

yield entry

4633

continuation_list[0] = self._extract_continuation(renderer)

4634

break

4635

4636

if not continuation_list[0]:

4637

continuation_list[0] = self._extract_continuation(is_renderer)

4638

4639

if not continuation_list[0]:

4640

continuation_list[0] = self._extract_continuation(parent_renderer)

4641

4642

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4643

continuation_list = [None]

4644

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4645

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4650

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4651

yield from extract_entries(parent_renderer)

4652

continuation = continuation_list[0]

4653

4654

for page_num in itertools.count(1):

4655

if not continuation:

4656

break

4657

headers = self.generate_api_headers(

4658

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4659

response = self._extract_response(

4660

item_id=f'{item_id} page {page_num}',

4661

query=continuation, headers=headers, ytcfg=ytcfg,

4662

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4667

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4668

visitor_data = self._extract_visitor_data(response) or visitor_data

4669

4670

known_renderers = {

4671

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4672

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4673

'gridVideoRenderer': (self._grid_entries, 'items'),

4674

'gridChannelRenderer': (self._grid_entries, 'items'),

4675

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4676

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4677

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4678

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4679

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4680

'playlistVideoListContinuation': (self._playlist_entries, None),

4681

'gridContinuation': (self._grid_entries, None),

4682

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4683

'sectionListContinuation': (extract_entries, None), # for feeds

4684

}

4685

4686

continuation_items = traverse_obj(response, (

4687

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4688

'appendContinuationItemsAction', 'continuationItems'

4689

), 'continuationContents', get_all=False)

4690

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4691

4692

video_items_renderer = None

4693

for key in continuation_item.keys():

4694

if key not in known_renderers:

4695

continue

4696

func, parent_key = known_renderers[key]

4697

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4698

continuation_list = [None]

4699

yield from func(video_items_renderer)

4700

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4701

4702

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4707

for tab_renderer in tabs:

4708

if tab_renderer.get('selected'):

4709

return tab_renderer

4710

if fatal:

4711

raise ExtractorError('Unable to find selected tab')

4712

4713

@staticmethod

4714

def _extract_tab_renderers(response):

4715

return traverse_obj(

4716

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

4717

4718

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4719

metadata = self._extract_metadata_from_tabs(item_id, data)

4720

4721

selected_tab = self._extract_selected_tab(tabs)

4722

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

4723

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

4724

4725

return self.playlist_result(

4726

self._entries(

4727

selected_tab, metadata['id'], ytcfg,

4728

self._extract_account_syncid(ytcfg, data),

4729

self._extract_visitor_data(data, ytcfg)),

4730

**metadata)

4731

4732

def _extract_metadata_from_tabs(self, item_id, data):

4733

info = {'id': item_id}

4734

4735

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

4736

if metadata_renderer:

4737

info.update({

4738

'uploader': metadata_renderer.get('title'),

4739

'uploader_id': metadata_renderer.get('externalId'),

4740

'uploader_url': metadata_renderer.get('channelUrl'),

4741

})

4742

if info['uploader_id']:

4743

info['id'] = info['uploader_id']

4744

else:

4745

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

4746

4747

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4748

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4749

def _get_uncropped(url):

4750

return url_or_none((url or '').split('=')[0] + '=s0')

4751

4752

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

4753

if avatar_thumbnails:

4754

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4755

if uncropped_avatar:

4756

avatar_thumbnails.append({

4757

'url': uncropped_avatar,

4758

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4763

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

4764

for banner in channel_banners:

4765

banner['preference'] = -10

4766

4767

if channel_banners:

4768

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4769

if uncropped_banner:

4770

channel_banners.append({

4771

'url': uncropped_banner,

4772

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

4777

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4778

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

4779

4780

primary_thumbnails = self._extract_thumbnails(

4781

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4782

playlist_thumbnails = self._extract_thumbnails(

4783

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

4784

4785

info.update({

4786

'title': (traverse_obj(metadata_renderer, 'title')

4787

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

4788

or info['id']),

4789

'availability': self._extract_availability(data),

4790

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4791

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

4792

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

4793

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

4794

})

4795

4796

# Playlist stats is a text runs array containing [video count, view count, last updated].

4797

# last updated or (view count and last updated) may be missing.

4798

playlist_stats = get_first(

4799

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

4800

4801

last_updated_unix = self._parse_time_text(

4802

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

4803

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

4804

info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')

4805

4806

info['view_count'] = self._get_count(playlist_stats, 1)

4807

if info['view_count'] is None: # 0 is allowed

4808

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

4809

4810

info['playlist_count'] = self._get_count(playlist_stats, 0)

4811

if info['playlist_count'] is None: # 0 is allowed

4812

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

4813

4814

if not info.get('uploader_id'):

4815

owner = traverse_obj(playlist_header_renderer, 'ownerText')

4816

if not owner: # Deprecated

4817

owner = traverse_obj(

4818

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

4819

('videoOwner', 'videoOwnerRenderer', 'title'))

4820

owner_text = self._get_text(owner)

4821

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

4822

info.update({

4823

'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

4824

'uploader_id': browse_ep.get('browseId'),

4825

'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))

})

info.update({

'channel': info['uploader'],

4830

'channel_id': info['uploader_id'],

4831

'channel_url': info['uploader_url']

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4836

first_id = last_id = response = None

4837

for page_num in itertools.count(1):

4838

videos = list(self._playlist_entries(playlist))

4839

if not videos:

4840

return

4841

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4842

if start >= len(videos):

4843

return

4844

yield from videos[start:]

4845

first_id = first_id or videos[0]['id']

4846

last_id = videos[-1]['id']

4847

watch_endpoint = try_get(

4848

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4849

headers = self.generate_api_headers(

4850

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4851

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4852

query = {

4853

'playlistId': playlist_id,

4854

'videoId': watch_endpoint.get('videoId') or last_id,

4855

'index': watch_endpoint.get('index') or len(videos),

4856

'params': watch_endpoint.get('params') or 'OAE%3D'

4857

}

4858

response = self._extract_response(

4859

item_id='%s page %d' % (playlist_id, page_num),

4860

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4861

check_get_keys='contents'

4862

)

4863

playlist = try_get(

4864

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4865

4866

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4867

title = playlist.get('title') or try_get(

4868

data, lambda x: x['titleText']['simpleText'], str)

4869

playlist_id = playlist.get('playlistId') or item_id

4870

4871

# Delegating everything except mix playlists to regular tab-based playlist URL

4872

playlist_url = urljoin(url, try_get(

4873

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4874

str))

4875

4876

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4877

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4878

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4879

4880

if playlist_url and playlist_url != url and not is_known_unviewable:

4881

return self.url_result(

4882

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4883

video_title=title)

4884

4885

return self.playlist_result(

4886

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4887

playlist_id=playlist_id, playlist_title=title)

4888

4889

def _extract_availability(self, data):

4890

"""

4891

Gets the availability of a given playlist/tab.

4892

Note: Unless YouTube tells us explicitly, we do not assume it is public

4893

@param data: response

4894

"""

4895

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4896

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

4897

player_header_privacy = playlist_header_renderer.get('privacy')

4898

4899

badges = self._extract_badges(sidebar_renderer)

4900

4901

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4902

privacy_setting_icon = get_first(

4903

(playlist_header_renderer, sidebar_renderer),

4904

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4905

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4906

expected_type=str)

4907

4908

microformats_is_unlisted = traverse_obj(

4909

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4914

or player_header_privacy == 'PUBLIC'

4915

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4916

else self._availability(

4917

is_private=(

4918

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4919

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4920

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4921

is_unlisted=(

4922

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4923

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4924

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

4925

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

4926

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4927

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4932

sidebar_renderer = try_get(

4933

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4934

for item in sidebar_renderer:

4935

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4940

"""

4941

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

4942

"""

4943

is_playlist = bool(traverse_obj(

4944

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

4945

if not is_playlist:

4946

return

4947

headers = self.generate_api_headers(

4948

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4949

visitor_data=self._extract_visitor_data(data, ytcfg))

4950

query = {

4951

'params': 'wgYCCAA=',

4952

'browseId': f'VL{item_id}'

4953

}

4954

return self._extract_response(

4955

item_id=item_id, headers=headers, query=query,

4956

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4957

note='Redownloading playlist API JSON with unavailable videos')

4958

4959

@functools.cached_property

4960

def skip_webpage(self):

4961

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4962

4963

def _extract_webpage(self, url, item_id, fatal=True):

4964

webpage, data = None, None

4965

for retry in self.RetryManager(fatal=fatal):

4966

try:

4967

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4968

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4969

except ExtractorError as e:

4970

if isinstance(e.cause, network_exceptions):

4971

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4972

retry.error = e

4973

continue

4974

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4979

except ExtractorError as e:

4980

self._error_or_warning(e, fatal=fatal)

4981

break

4982

4983

# Sometimes youtube returns a webpage with incomplete ytInitialData

4984

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4985

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4986

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4992

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4993

if not ytcfg and self.is_authenticated:

4994

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4995

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4996

raise ExtractorError(

4997

f'{msg}. If you are not downloading private content, or '

4998

'your cookies are only for the first account and channel,'

4999

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

5000

expected=True)

5001

self.report_warning(msg, only_once=True)

5002

5003

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

5004

data = None

5005

if not self.skip_webpage:

5006

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

5007

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

5008

# Reject webpage data if redirected to home page without explicitly requesting

5009

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

5010

if (url != 'https://www.youtube.com/feed/recommended'

5011

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

5012

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

5013

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

5014

if fatal:

5015

raise ExtractorError(msg, expected=True)

5016

self.report_warning(msg, only_once=True)

5017

if not data:

5018

self._report_playlist_authcheck(ytcfg, fatal=fatal)

5019

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

5020

return data, ytcfg

5021

5022

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

5023

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

5024

resolve_response = self._extract_response(

5025

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

5026

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

5027

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

5028

for ep_key, ep in endpoints.items():

5029

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

5030

if params:

5031

return self._extract_response(

5032

item_id=item_id, query=params, ep=ep, headers=headers,

5033

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

5034

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

5035

err_note = 'Failed to resolve url (does the playlist exist?)'

5036

if fatal:

5037

raise ExtractorError(err_note, expected=True)

5038

self.report_warning(err_note, item_id)

5039

5040

_SEARCH_PARAMS = None

5041

5042

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

5043

data = {'query': query}

5044

if params is NO_DEFAULT:

5045

params = self._SEARCH_PARAMS

5046

if params:

5047

data['params'] = params

5048

5049

content_keys = (

5050

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

5051

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

5052

# ytmusic search

5053

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

5054

('continuationContents', ),

5055

)

5056

display_id = f'query "{query}"'

5057

check_get_keys = tuple({keys[0] for keys in content_keys})

5058

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

5059

self._report_playlist_authcheck(ytcfg, fatal=False)

5060

5061

continuation_list = [None]

5062

search = None

5063

for page_num in itertools.count(1):

5064

data.update(continuation_list[0] or {})

5065

headers = self.generate_api_headers(

5066

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

5067

search = self._extract_response(

5068

item_id=f'{display_id} page {page_num}', ep='search', query=data,

5069

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

5070

slr_contents = traverse_obj(search, *content_keys)

5071

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

5072

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

5077

IE_DESC = 'YouTube Tabs'

5078

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5087

(?P<not_channel>

5088

feed/|hashtag/|

5089

(?:playlist|watch)\?.*?\blist=

5090

)|

5091

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5096

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5097

}

5098

IE_NAME = 'youtube:tab'

5099

5100

_TESTS = [{

5101

'note': 'playlists, multipage',

5102

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5103

'playlist_mincount': 94,

5104

'info_dict': {

5105

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5106

'title': 'Igor Kleiner - Playlists',

5107

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5108

'uploader': 'Igor Kleiner',

5109

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5110

'channel': 'Igor Kleiner',

5111

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5112

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5113

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5114

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5115

'channel_follower_count': int

5116

},

5117

}, {

5118

'note': 'playlists, multipage, different order',

5119

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5120

'playlist_mincount': 94,

5121

'info_dict': {

5122

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5123

'title': 'Igor Kleiner - Playlists',

5124

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5125

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5126

'uploader': 'Igor Kleiner',

5127

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5128

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5129

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5130

'channel': 'Igor Kleiner',

5131

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5132

'channel_follower_count': int

5133

},

5134

}, {

5135

'note': 'playlists, series',

5136

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5137

'playlist_mincount': 5,

5138

'info_dict': {

5139

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5140

'title': '3Blue1Brown - Playlists',

5141

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5142

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5143

'uploader': '3Blue1Brown',

5144

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5145

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5146

'channel': '3Blue1Brown',

5147

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5148

'tags': ['Mathematics'],

5149

'channel_follower_count': int

5150

},

5151

}, {

5152

'note': 'playlists, singlepage',

5153

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5154

'playlist_mincount': 4,

5155

'info_dict': {

5156

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5157

'title': 'ThirstForScience - Playlists',

5158

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5159

'uploader': 'ThirstForScience',

5160

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5161

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5162

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5163

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5164

'tags': 'count:13',

5165

'channel': 'ThirstForScience',

5166

'channel_follower_count': int

5167

}

5168

}, {

5169

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5170

'only_matching': True,

5171

}, {

5172

'note': 'basic, single video playlist',

5173

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5174

'info_dict': {

5175

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5176

'uploader': 'Sergey M.',

5177

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5178

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5183

'channel': 'Sergey M.',

5184

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5185

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5186

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5187

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5192

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5193

'info_dict': {

5194

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5195

'uploader': 'Sergey M.',

5196

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5197

'title': 'youtube-dl empty playlist',

5198

'tags': [],

5199

'channel': 'Sergey M.',

5200

'description': '',

5201

'modified_date': '20160902',

5202

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5203

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5204

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5205

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5211

'info_dict': {

5212

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5213

'title': 'lex will - Home',

5214

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5215

'uploader': 'lex will',

5216

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5217

'channel': 'lex will',

5218

'tags': ['bible', 'history', 'prophesy'],

5219

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5220

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5221

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5222

'channel_follower_count': int

5223

},

5224

'playlist_mincount': 2,

5225

}, {

5226

'note': 'Videos tab',

5227

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5228

'info_dict': {

5229

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5230

'title': 'lex will - Videos',

5231

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5232

'uploader': 'lex will',

5233

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5234

'tags': ['bible', 'history', 'prophesy'],

5235

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5236

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5237

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5238

'channel': 'lex will',

5239

'channel_follower_count': int

5240

},

5241

'playlist_mincount': 975,

5242

}, {

5243

'note': 'Videos tab, sorted by popular',

5244

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5245

'info_dict': {

5246

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5247

'title': 'lex will - Videos',

5248

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5249

'uploader': 'lex will',

5250

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5251

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5252

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5253

'channel': 'lex will',

5254

'tags': ['bible', 'history', 'prophesy'],

5255

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5256

'channel_follower_count': int

5257

},

5258

'playlist_mincount': 199,

5259

}, {

5260

'note': 'Playlists tab',

5261

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5262

'info_dict': {

5263

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5264

'title': 'lex will - Playlists',

5265

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5266

'uploader': 'lex will',

5267

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5268

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5269

'channel': 'lex will',

5270

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5271

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5272

'tags': ['bible', 'history', 'prophesy'],

5273

'channel_follower_count': int

5274

},

5275

'playlist_mincount': 17,

5276

}, {

5277

'note': 'Community tab',

5278

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5279

'info_dict': {

5280

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5281

'title': 'lex will - Community',

5282

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5283

'uploader': 'lex will',

5284

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5285

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5286

'channel': 'lex will',

5287

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5288

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5289

'tags': ['bible', 'history', 'prophesy'],

5290

'channel_follower_count': int

5291

},

5292

'playlist_mincount': 18,

5293

}, {

5294

'note': 'Channels tab',

5295

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5296

'info_dict': {

5297

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5298

'title': 'lex will - Channels',

5299

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5300

'uploader': 'lex will',

5301

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5302

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5303

'channel': 'lex will',

5304

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5305

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5306

'tags': ['bible', 'history', 'prophesy'],

5307

'channel_follower_count': int

5308

},

5309

'playlist_mincount': 12,

5310

}, {

5311

'note': 'Search tab',

5312

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5313

'playlist_mincount': 40,

5314

'info_dict': {

5315

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5316

'title': '3Blue1Brown - Search - linear algebra',

5317

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5318

'uploader': '3Blue1Brown',

5319

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5320

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5321

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5322

'tags': ['Mathematics'],

5323

'channel': '3Blue1Brown',

5324

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5325

'channel_follower_count': int

5326

},

5327

}, {

5328

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5329

'only_matching': True,

5330

}, {

5331

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5332

'only_matching': True,

5333

}, {

5334

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5335

'only_matching': True,

5336

}, {

5337

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5338

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5339

'info_dict': {

5340

'title': '29C3: Not my department',

5341

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5342

'uploader': 'Christiaan008',

5343

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5344

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5345

'tags': [],

5346

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5347

'view_count': int,

5348

'modified_date': '20150605',

5349

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5350

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5351

'channel': 'Christiaan008',

5352

'availability': 'public',

5353

},

5354

'playlist_count': 96,

5355

}, {

5356

'note': 'Large playlist',

5357

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5358

'info_dict': {

5359

'title': 'Uploads from Cauchemar',

5360

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5361

'uploader': 'Cauchemar',

5362

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5363

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5364

'tags': [],

5365

'modified_date': r're:\d{8}',

5366

'channel': 'Cauchemar',

5367

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5368

'view_count': int,

5369

'description': '',

5370

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5371

'availability': 'public',

5372

},

5373

'playlist_mincount': 1123,

5374

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5375

}, {

5376

'note': 'even larger playlist, 8832 videos',

5377

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5378

'only_matching': True,

5379

}, {

5380

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5381

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5382

'info_dict': {

5383

'title': 'Uploads from Interstellar Movie',

5384

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5385

'uploader': 'Interstellar Movie',

5386

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5387

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5388

'tags': [],

5389

'view_count': int,

5390

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5391

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5392

'channel': 'Interstellar Movie',

5393

'description': '',

5394

'modified_date': r're:\d{8}',

5395

'availability': 'public',

5396

},

5397

'playlist_mincount': 21,

5398

}, {

5399

'note': 'Playlist with "show unavailable videos" button',

5400

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5401

'info_dict': {

5402

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5403

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5404

'uploader': 'Phim Siêu Nhân Nhật Bản',

5405

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5406

'view_count': int,

5407

'channel': 'Phim Siêu Nhân Nhật Bản',

5408

'tags': [],

5409

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5410

'description': '',

5411

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5412

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5413

'modified_date': r're:\d{8}',

5414

'availability': 'public',

5415

},

5416

'playlist_mincount': 200,

5417

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5418

}, {

5419

'note': 'Playlist with unavailable videos in page 7',

5420

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5421

'info_dict': {

5422

'title': 'Uploads from BlankTV',

5423

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5424

'uploader': 'BlankTV',

5425

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5426

'channel': 'BlankTV',

5427

'channel_url': 'https://www.youtube.com/c/blanktv',

5428

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5429

'view_count': int,

5430

'tags': [],

5431

'uploader_url': 'https://www.youtube.com/c/blanktv',

5432

'modified_date': r're:\d{8}',

5433

'description': '',

5434

'availability': 'public',

5435

},

5436

'playlist_mincount': 1000,

5437

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5438

}, {

5439

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5440

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5441

'info_dict': {

5442

'title': 'Data Analysis with Dr Mike Pound',

5443

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5444

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5445

'uploader': 'Computerphile',

5446

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5447

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5448

'tags': [],

5449

'view_count': int,

5450

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5451

'channel_url': 'https://www.youtube.com/user/Computerphile',

5452

'channel': 'Computerphile',

5453

'availability': 'public',

5454

'modified_date': '20190712',

5455

},

5456

'playlist_mincount': 11,

5457

}, {

5458

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5459

'only_matching': True,

5460

}, {

5461

'note': 'Playlist URL that does not actually serve a playlist',

5462

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5467

'uploader': 'STREEM',

5468

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5469

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5470

'upload_date': '20150526',

5471

'license': 'Standard YouTube License',

5472

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5473

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5480

},

5481

'skip': 'This video is not available.',

5482

'add_ie': [YoutubeIE.ie_key()],

5483

}, {

5484

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5485

'only_matching': True,

5486

}, {

5487

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5488

'only_matching': True,

5489

}, {

5490

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5491

'info_dict': {

5492

'id': 'Wq15eF5vCbI', # This will keep changing

5493

'ext': 'mp4',

5494

'title': str,

5495

'uploader': 'Sky News',

5496

'uploader_id': 'skynews',

5497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5498

'upload_date': r're:\d{8}',

5499

'description': str,

5500

'categories': ['News & Politics'],

5501

'tags': list,

5502

'like_count': int,

5503

'release_timestamp': int,

5504

'channel': 'Sky News',

5505

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5506

'age_limit': 0,

5507

'view_count': int,

5508

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5509

'playable_in_embed': True,

5510

'release_date': r're:\d+',

5511

'availability': 'public',

5512

'live_status': 'is_live',

5513

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5514

'channel_follower_count': int,

5515

'concurrent_view_count': int,

5516

},

5517

'params': {

5518

'skip_download': True,

5519

},

5520

'expected_warnings': ['Ignoring subtitle tracks found in '],

5521

}, {

5522

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5527

'uploader': 'The Young Turks',

5528

'uploader_id': 'TheYoungTurks',

5529

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5530

'upload_date': '20150715',

5531

'license': 'Standard YouTube License',

5532

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5533

'categories': ['News & Politics'],

5534

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5539

},

5540

'only_matching': True,

5541

}, {

5542

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5543

'only_matching': True,

5544

}, {

5545

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5546

'only_matching': True,

5547

}, {

5548

'note': 'A channel that is not live. Should raise error',

5549

'url': 'https://www.youtube.com/user/numberphile/live',

5550

'only_matching': True,

5551

}, {

5552

'url': 'https://www.youtube.com/feed/trending',

5553

'only_matching': True,

5554

}, {

5555

'url': 'https://www.youtube.com/feed/library',

5556

'only_matching': True,

5557

}, {

5558

'url': 'https://www.youtube.com/feed/history',

5559

'only_matching': True,

5560

}, {

5561

'url': 'https://www.youtube.com/feed/subscriptions',

5562

'only_matching': True,

5563

}, {

5564

'url': 'https://www.youtube.com/feed/watch_later',

5565

'only_matching': True,

5566

}, {

5567

'note': 'Recommended - redirects to home page.',

5568

'url': 'https://www.youtube.com/feed/recommended',

5569

'only_matching': True,

5570

}, {

5571

'note': 'inline playlist with not always working continuations',

5572

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5573

'only_matching': True,

5574

}, {

5575

'url': 'https://www.youtube.com/course',

5576

'only_matching': True,

5577

}, {

5578

'url': 'https://www.youtube.com/zsecurity',

5579

'only_matching': True,

5580

}, {

5581

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5582

'only_matching': True,

5583

}, {

5584

'url': 'https://www.youtube.com/TheYoungTurks/live',

5585

'only_matching': True,

5586

}, {

5587

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5594

}, {

5595

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5596

'only_matching': True,

5597

}, {

5598

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5599

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5600

'only_matching': True

5601

}, {

5602

'note': '/browse/ should redirect to /channel/',

5603

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5604

'only_matching': True

5605

}, {

5606

'note': 'VLPL, should redirect to playlist?list=PL...',

5607

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5608

'info_dict': {

5609

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5610

'uploader': 'NoCopyrightSounds',

5611

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5612

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5613

'title': 'NCS : All Releases 💿',

5614

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5615

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5616

'modified_date': r're:\d{8}',

5617

'view_count': int,

5618

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5619

'tags': [],

5620

'channel': 'NoCopyrightSounds',

5621

'availability': 'public',

5622

},

5623

'playlist_mincount': 166,

5624

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5625

}, {

5626

'note': 'Topic, should redirect to playlist?list=UU...',

5627

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5628

'info_dict': {

5629

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5630

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5631

'title': 'Uploads from Royalty Free Music - Topic',

5632

'uploader': 'Royalty Free Music - Topic',

5633

'tags': [],

5634

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5635

'channel': 'Royalty Free Music - Topic',

5636

'view_count': int,

5637

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5638

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5639

'modified_date': r're:\d{8}',

5640

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5641

'description': '',

5642

'availability': 'public',

5643

},

5644

'playlist_mincount': 101,

5645

}, {

5646

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5647

# Treat as a general feed

5648

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5649

'info_dict': {

5650

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5651

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5652

'tags': [],

5653

},

5654

'playlist_mincount': 9,

5655

}, {

5656

'note': 'Youtube music Album',

5657

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5658

'info_dict': {

5659

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5660

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5665

'modified_date': r're:\d{8}',

5666

},

5667

'playlist_count': 50,

5668

}, {

5669

'note': 'unlisted single video playlist',

5670

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5671

'info_dict': {

5672

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5673

'uploader': 'colethedj',

5674

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5675

'title': 'yt-dlp unlisted playlist test',

5676

'availability': 'unlisted',

5677

'tags': [],

5678

'modified_date': '20220418',

5679

'channel': 'colethedj',

5680

'view_count': int,

5681

'description': '',

5682

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5683

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5684

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5689

'url': 'https://www.youtube.com/feed/recommended',

5690

'info_dict': {

5691

'id': 'recommended',

5692

'title': 'recommended',

5693

'tags': [],

5694

},

5695

'playlist_mincount': 50,

5696

'params': {

5697

'skip_download': True,

5698

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5699

},

5700

}, {

5701

'note': 'API Fallback: /videos tab, sorted by oldest first',

5702

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5703

'info_dict': {

5704

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5705

'title': 'Cody\'sLab - Videos',

5706

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5707

'uploader': 'Cody\'sLab',

5708

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5709

'channel': 'Cody\'sLab',

5710

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5711

'tags': [],

5712

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5713

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5714

'channel_follower_count': int

5715

},

5716

'playlist_mincount': 650,

5717

'params': {

5718

'skip_download': True,

5719

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5720

},

5721

'skip': 'Query for sorting no longer works',

5722

}, {

5723

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5724

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5725

'info_dict': {

5726

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5727

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5728

'title': 'Uploads from Royalty Free Music - Topic',

5729

'uploader': 'Royalty Free Music - Topic',

5730

'modified_date': r're:\d{8}',

5731

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5732

'description': '',

5733

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5734

'tags': [],

5735

'channel': 'Royalty Free Music - Topic',

5736

'view_count': int,

5737

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5738

'availability': 'public',

5739

},

5740

'playlist_mincount': 101,

5741

'params': {

5742

'skip_download': True,

5743

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5744

},

5745

}, {

5746

'note': 'non-standard redirect to regional channel',

5747

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5748

'only_matching': True

5749

}, {

5750

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5751

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5752

'info_dict': {

5753

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5754

'modified_date': '20220407',

5755

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5756

'tags': [],

5757

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5758

'uploader': 'pukkandan',

5759

'availability': 'unlisted',

5760

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5761

'channel': 'pukkandan',

5762

'description': 'Test for collaborative playlist',

5763

'title': 'yt-dlp test - collaborative playlist',

5764

'view_count': int,

5765

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5766

},

5767

'playlist_mincount': 2

5768

}, {

5769

'note': 'translated tab name',

5770

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5771

'info_dict': {

5772

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5773

'tags': [],

5774

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5775

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5776

'description': 'test description',

5777

'title': 'cole-dlp-test-acc - 再生リスト',

5778

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5779

'uploader': 'cole-dlp-test-acc',

5780

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5781

'channel': 'cole-dlp-test-acc',

5782

},

5783

'playlist_mincount': 1,

5784

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5785

'expected_warnings': ['Preferring "ja"'],

5786

}, {

5787

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5788

'note': 'preferred lang set with playlist with translated video titles',

5789

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5790

'info_dict': {

5791

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5792

'tags': [],

5793

'view_count': int,

5794

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5795

'uploader': 'cole-dlp-test-acc',

5796

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5797

'channel': 'cole-dlp-test-acc',

5798

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5799

'description': 'test',

5800

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5801

'title': 'dlp test playlist',

5802

'availability': 'public',

5803

},

5804

'playlist_mincount': 1,

5805

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5806

'expected_warnings': ['Preferring "ja"'],

5807

}, {

5808

# shorts audio pivot for 2GtVksBMYFM.

5809

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5810

'info_dict': {

5811

'id': 'sfv_audio_pivot',

5812

'title': 'sfv_audio_pivot',

5813

'tags': [],

5814

},

5815

'playlist_mincount': 50,

5816

5817

}, {

5818

# Channel with a real live tab (not to be mistaken with streams tab)

5819

# Do not treat like it should redirect to live stream

5820

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

5821

'info_dict': {

5822

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

5823

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

5824

'tags': [],

5825

},

5826

'playlist_mincount': 20,

5827

}, {

5828

# Tab name is not the same as tab id

5829

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

5830

'info_dict': {

5831

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5832

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

5833

'tags': [],

5834

},

5835

'playlist_mincount': 8,

5836

}, {

5837

# Home tab id is literally home. Not to get mistaken with featured

5838

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

5839

'info_dict': {

5840

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5841

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

5842

'tags': [],

5843

},

5844

'playlist_mincount': 8,

5845

}, {

5846

# Should get three playlists for videos, shorts and streams tabs

5847

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5848

'info_dict': {

5849

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5850

'title': 'Polka Ch. 尾丸ポルカ',

5851

'channel_follower_count': int,

5852

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5853

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5854

'uploader': 'Polka Ch. 尾丸ポルカ',

5855

'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',

5856

'channel': 'Polka Ch. 尾丸ポルカ',

5857

'tags': 'count:35',

5858

'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5859

'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

5864

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

5865

'info_dict': {

5866

'id': 'UC0intLFzLaudFG-xAvUEO-A',

5867

'title': 'Not Just Bikes - Shorts',

5868

'tags': 'count:12',

5869

'uploader': 'Not Just Bikes',

5870

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5871

'description': 'md5:7513148b1f02b924783157d84c4ea555',

5872

'channel_follower_count': int,

5873

'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',

5874

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

5875

'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5876

'channel': 'Not Just Bikes',

5877

},

5878

'playlist_mincount': 10,

5879

}, {

5880

# Streams tab

5881

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

5882

'info_dict': {

5883

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5884

'title': '中村悠一 - Live',

5885

'tags': 'count:7',

5886

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5887

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5888

'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5889

'channel': '中村悠一',

5890

'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5891

'channel_follower_count': int,

5892

'uploader': '中村悠一',

5893

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

5894

},

5895

'playlist_mincount': 60,

5896

}, {

5897

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

5898

# See test_youtube_lists

5899

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

5900

'only_matching': True,

5901

}, {

5902

# No uploads and no UCID given. Should fail with no uploads error

5903

# See test_youtube_lists

5904

'url': 'https://www.youtube.com/news',

5905

'only_matching': True

5906

}, {

5907

# No videos tab but has a shorts tab

5908

'url': 'https://www.youtube.com/c/TKFShorts',

5909

'info_dict': {

5910

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5911

'title': 'Shorts Break - Shorts',

5912

'tags': 'count:32',

5913

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5914

'channel': 'Shorts Break',

5915

'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',

5916

'uploader': 'Shorts Break',

5917

'channel_follower_count': int,

5918

'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5919

'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5920

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5921

},

5922

'playlist_mincount': 30,

5923

}, {

5924

# Trending Now Tab. tab id is empty

5925

'url': 'https://www.youtube.com/feed/trending',

5926

'info_dict': {

5927

'id': 'trending',

5928

'title': 'trending - Now',

5929

'tags': [],

5930

},

5931

'playlist_mincount': 30,

5932

}, {

5933

# Trending Gaming Tab. tab id is empty

5934

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

5935

'info_dict': {

5936

'id': 'trending',

5937

'title': 'trending - Gaming',

5938

'tags': [],

5939

},

5940

'playlist_mincount': 30,

5941

}, {

5942

# Shorts url result in shorts tab

5943

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

5944

'info_dict': {

5945

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5946

'title': 'cole-dlp-test-acc - Shorts',

5947

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5948

'channel': 'cole-dlp-test-acc',

5949

'description': 'test description',

5950

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5951

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5952

'tags': [],

5953

'uploader': 'cole-dlp-test-acc',

5954

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

5962

'id': 'sSM9J5YH_60',

5963

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5964

'title': 'SHORT short',

5965

'channel': 'cole-dlp-test-acc',

5966

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

5972

}, {

5973

# Live video status should be extracted

5974

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

5975

'info_dict': {

5976

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5977

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

5985

'id': str,

5986

'title': str,

5987

'live_status': 'is_live',

5988

'channel_id': str,

5989

'channel_url': str,

5990

'concurrent_view_count': int,

'channel': str,

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

5995

'playlist_mincount': 1

5996

}, {

5997

# Channel renderer metadata. Contains number of videos on the channel

5998

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',

5999

'info_dict': {

6000

'id': 'UCiu-3thuViMebBjw_5nWYrA',

6001

'title': 'cole-dlp-test-acc - Channels',

6002

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

6003

'channel': 'cole-dlp-test-acc',

6004

'description': 'test description',

6005

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

6006

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

6007

'tags': [],

6008

'uploader': 'cole-dlp-test-acc',

6009

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'YoutubeTab',

6016

'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6017

'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6018

'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6019

'title': 'PewDiePie',

6020

'channel': 'PewDiePie',

6021

'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',

6022

'thumbnails': list,

6023

'channel_follower_count': int,

6024

'playlist_count': int

6025

}

6026

}],

6027

'params': {'extract_flat': True},

}]

@classmethod

def suitable(cls, url):

6032

return False if YoutubeIE.suitable(url) else super().suitable(url)

6033

6034

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

6035

6036

def _get_url_mobj(self, url):

6037

mobj = self._URL_RE.match(url).groupdict()

6038

mobj.update((k, '') for k, v in mobj.items() if v is None)

6039

return mobj

6040

6041

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

6042

tab_name = (tab.get('title') or '').lower()

6043

tab_url = urljoin(base_url, traverse_obj(

6044

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

6045

6046

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

6047

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

6048

if tab_id:

6049

return {

6050

'TAB_ID_SPONSORSHIPS': 'membership',

6051

}.get(tab_id, tab_id), tab_name

6052

6053

# Fallback to tab name if we cannot get the tab id.

6054

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

6055

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

6056

if tab_name:

6057

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

6062

6063

def _has_tab(self, tabs, tab_id):

6064

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

6065

6066

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

6067

def _real_extract(self, url, smuggled_data):

6068

item_id = self._match_id(url)

6069

url = urllib.parse.urlunparse(

6070

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

6071

compat_opts = self.get_param('compat_opts', [])

6072

6073

mobj = self._get_url_mobj(url)

6074

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

6075

if is_channel and smuggled_data.get('is_music_url'):

6076

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

6077

return self.url_result(

6078

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

6079

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

6080

mdata = self._extract_tab_endpoint(

6081

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

6082

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

6083

get_all=False, expected_type=str)

6084

if not murl:

6085

raise ExtractorError('Failed to resolve album to playlist')

6086

return self.url_result(murl, YoutubeTabIE)

6087

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

6088

return self.url_result(

6089

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

6090

6091

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

6092

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

6093

url = f'{pre}/videos{post}'

6094

6095

# Handle both video/playlist URLs

6096

qs = parse_qs(url)

6097

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

6098

if not video_id and mobj['not_channel'].startswith('watch'):

6099

if not playlist_id:

6100

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

6101

raise ExtractorError('A video URL was given without video ID', expected=True)

6102

# Common mistake: https://www.youtube.com/watch?list=playlist_id

6103

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

6104

return self.url_result(

6105

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

6106

6107

if not self._yes_playlist(playlist_id, video_id):

6108

return self.url_result(

6109

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6110

6111

data, ytcfg = self._extract_data(url, display_id)

6112

6113

# YouTube may provide a non-standard redirect to the regional channel

6114

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6115

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6116

redirect_url = traverse_obj(

6117

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6118

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6119

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6120

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6121

return self.url_result(redirect_url, YoutubeTabIE)

6122

6123

tabs, extra_tabs = self._extract_tab_renderers(data), []

6124

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6125

selected_tab = self._extract_selected_tab(tabs)

6126

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6127

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6128

6129

if not original_tab_id and selected_tab_name:

6130

self.to_screen('Downloading all uploads of the channel. '

6131

'To download only the videos in a specific tab, pass the tab\'s URL')

6132

if self._has_tab(tabs, 'streams'):

6133

extra_tabs.append(''.join((pre, '/streams', post)))

6134

if self._has_tab(tabs, 'shorts'):

6135

extra_tabs.append(''.join((pre, '/shorts', post)))

6136

# XXX: Members-only tab should also be extracted

6137

6138

if not extra_tabs and selected_tab_id != 'videos':

6139

# Channel does not have streams, shorts or videos tabs

6140

if item_id[:2] != 'UC':

6141

raise ExtractorError('This channel has no uploads', expected=True)

6142

6143

# Topic channels don't have /videos. Use the equivalent playlist instead

6144

pl_id = f'UU{item_id[2:]}'

6145

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6146

try:

6147

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6148

except ExtractorError:

6149

raise ExtractorError('This channel has no uploads', expected=True)

6150

else:

6151

item_id, url = pl_id, pl_url

6152

self.to_screen(

6153

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6154

6155

elif extra_tabs and selected_tab_id != 'videos':

6156

# When there are shorts/live tabs but not videos tab

6157

url, data = f'{pre}{post}', None

6158

6159

elif (original_tab_id or 'videos') != selected_tab_id:

6160

if original_tab_id == 'live':

6161

# Live tab should have redirected to the video

6162

# Except in the case the channel has an actual live tab

6163

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6164

raise UserNotLive(video_id=item_id)

6165

elif selected_tab_name:

6166

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6167

6168

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6169

url = f'{pre}{post}'

6170

6171

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6172

if 'no-youtube-unavailable-videos' not in compat_opts:

6173

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6174

self._extract_and_report_alerts(data, only_once=True)

6175

6176

tabs, entries = self._extract_tab_renderers(data), []

6177

if tabs:

6178

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6179

entries[0].update({

6180

'extractor_key': YoutubeTabIE.ie_key(),

6181

'extractor': YoutubeTabIE.IE_NAME,

6182

'webpage_url': url,

6183

})

6184

if self.get_param('playlist_items') == '0':

6185

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6186

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6187

entries.extend(map(self._real_extract, extra_tabs))

6188

6189

if len(entries) == 1:

6190

return entries[0]

6191

elif entries:

6192

metadata = self._extract_metadata_from_tabs(item_id, data)

6193

uploads_url = 'the Uploads (UU) playlist URL'

6194

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6195

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6196

self.to_screen(

6197

'Downloading as multiple playlists, separated by tabs. '

6198

f'To download as a single playlist instead, pass {uploads_url}')

6199

return self.playlist_result(entries, item_id, **metadata)

6200

6201

# Inline playlist

6202

playlist = traverse_obj(

6203

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6204

if playlist:

6205

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6206

6207

video_id = traverse_obj(

6208

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6209

if video_id:

6210

if tab != '/live': # live tab is expected to redirect to video

6211

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6212

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6213

6214

raise ExtractorError('Unable to recognize tab page')

6215

6216

6217

class YoutubePlaylistIE(InfoExtractor):

6218

IE_DESC = 'YouTube playlists'

6219

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6230

)''' % {

6231

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6232

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6233

}

6234

IE_NAME = 'youtube:playlist'

6235

_TESTS = [{

6236

'note': 'issue #673',

6237

'url': 'PLBB231211A4F62143',

6238

'info_dict': {

6239

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6240

'id': 'PLBB231211A4F62143',

6241

'uploader': 'Wickman',

6242

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6243

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6244

'view_count': int,

6245

'uploader_url': 'https://www.youtube.com/c/WickmanVT',

6246

'modified_date': r're:\d{8}',

6247

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6248

'channel': 'Wickman',

6249

'tags': [],

6250

'channel_url': 'https://www.youtube.com/c/WickmanVT',

6251

'availability': 'public',

6252

},

6253

'playlist_mincount': 29,

6254

}, {

6255

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6256

'info_dict': {

6257

'title': 'YDL_safe_search',

6258

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6259

},

6260

'playlist_count': 2,

6261

'skip': 'This playlist is private',

6262

}, {

6263

'note': 'embedded',

6264

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6269

'uploader': 'milan',

6270

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6271

'description': '',

6272

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6273

'tags': [],

6274

'modified_date': '20140919',

6275

'view_count': int,

6276

'channel': 'milan',

6277

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6278

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6279

'availability': 'public',

6280

},

6281

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],

6282

}, {

6283

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6284

'playlist_mincount': 455,

6285

'info_dict': {

6286

'title': '2018 Chinese New Singles (11/6 updated)',

6287

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6288

'uploader': 'LBK',

6289

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6290

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6291

'channel': 'LBK',

6292

'view_count': int,

6293

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

6294

'tags': [],

6295

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

6296

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6297

'modified_date': r're:\d{8}',

6298

'availability': 'public',

6299

},

6300

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6301

}, {

6302

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6303

'only_matching': True,

6304

}, {

6305

# music album playlist

6306

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6307

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6312

if YoutubeTabIE.suitable(url):

6313

return False

6314

from ..utils import parse_qs

6315

qs = parse_qs(url)

6316

if qs.get('v', [None])[0]:

6317

return False

6318

return super().suitable(url)

6319

6320

def _real_extract(self, url):

6321

playlist_id = self._match_id(url)

6322

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6323

url = update_url_query(

6324

'https://www.youtube.com/playlist',

6325

parse_qs(url) or {'list': playlist_id})

6326

if is_music_url:

6327

url = smuggle_url(url, {'is_music_url': True})

6328

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6329

6330

6331

class YoutubeYtBeIE(InfoExtractor):

6332

IE_DESC = 'youtu.be'

6333

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6334

_TESTS = [{

6335

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6340

'uploader': 'Backus-Page House Museum',

6341

'uploader_id': 'backuspagemuseum',

6342

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

6343

'upload_date': '20161008',

6344

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6345

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6350

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

6351

'channel': 'Backus-Page House Museum',

6352

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6353

'live_status': 'not_live',

6354

'view_count': int,

6355

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6356

'availability': 'public',

6357

'duration': 59,

6358

'comment_count': int,

6359

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6364

},

6365

}, {

6366

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6367

'only_matching': True,

6368

}]

6369

6370

def _real_extract(self, url):

6371

mobj = self._match_valid_url(url)

6372

video_id = mobj.group('id')

6373

playlist_id = mobj.group('playlist_id')

6374

return self.url_result(

6375

update_url_query('https://www.youtube.com/watch', {

6376

'v': video_id,

6377

'list': playlist_id,

6378

'feature': 'youtu.be',

6379

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6380

6381

6382

class YoutubeLivestreamEmbedIE(InfoExtractor):

6383

IE_DESC = 'YouTube livestream embeds'

6384

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6385

_TESTS = [{

6386

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6387

'only_matching': True,

6388

}]

6389

6390

def _real_extract(self, url):

6391

channel_id = self._match_id(url)

6392

return self.url_result(

6393

f'https://www.youtube.com/channel/{channel_id}/live',

6394

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6395

6396

6397

class YoutubeYtUserIE(InfoExtractor):

6398

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6399

IE_NAME = 'youtube:user'

6400

_VALID_URL = r'ytuser:(?P<id>.+)'

6401

_TESTS = [{

6402

'url': 'ytuser:phihag',

6403

'only_matching': True,

6404

}]

6405

6406

def _real_extract(self, url):

6407

user_id = self._match_id(url)

6408

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6409

6410

6411

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6412

IE_NAME = 'youtube:favorites'

6413

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6414

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6415

_LOGIN_REQUIRED = True

6416

_TESTS = [{

6417

'url': ':ytfav',

6418

'only_matching': True,

6419

}, {

6420

'url': ':ytfavorites',

6421

'only_matching': True,

6422

}]

6423

6424

def _real_extract(self, url):

6425

return self.url_result(

6426

'https://www.youtube.com/playlist?list=LL',

6427

ie=YoutubeTabIE.ie_key())

6428

6429

6430

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6431

IE_NAME = 'youtube:notif'

6432

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6433

_VALID_URL = r':ytnotif(?:ication)?s?'

6434

_LOGIN_REQUIRED = True

6435

_TESTS = [{

6436

'url': ':ytnotif',

6437

'only_matching': True,

6438

}, {

6439

'url': ':ytnotifications',

6440

'only_matching': True,

6441

}]

6442

6443

def _extract_notification_menu(self, response, continuation_list):

6444

notification_list = traverse_obj(

6445

response,

6446

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6447

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6448

expected_type=list) or []

6449

continuation_list[0] = None

6450

for item in notification_list:

6451

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6452

if entry:

6453

yield entry

6454

continuation = item.get('continuationItemRenderer')

6455

if continuation:

6456

continuation_list[0] = continuation

6457

6458

def _extract_notification_renderer(self, notification):

6459

video_id = traverse_obj(

6460

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6461

url = f'https://www.youtube.com/watch?v={video_id}'

6462

channel_id = None

6463

if not video_id:

6464

browse_ep = traverse_obj(

6465

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6466

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6467

post_id = self._search_regex(

6468

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6469

'post id', default=None)

6470

if not channel_id or not post_id:

6471

return

6472

# The direct /post url redirects to this in the browser

6473

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6474

6475

channel = traverse_obj(

6476

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6477

expected_type=str)

6478

notification_title = self._get_text(notification, 'shortMessage')

6479

if notification_title:

6480

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6481

# TODO: handle recommended videos

6482

title = self._search_regex(

6483

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6484

'video title', default=None)

6485

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6486

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6492

'video_id': video_id,

6493

'title': title,

6494

'channel_id': channel_id,

6495

'channel': channel,

6496

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6497

'timestamp': timestamp,

6498

}

6499

6500

def _notification_menu_entries(self, ytcfg):

6501

continuation_list = [None]

6502

response = None

6503

for page in itertools.count(1):

6504

ctoken = traverse_obj(

6505

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6506

response = self._extract_response(

6507

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6508

ep='notification/get_notification_menu', check_get_keys='actions',

6509

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6510

yield from self._extract_notification_menu(response, continuation_list)

6511

if not continuation_list[0]:

6512

break

6513

6514

def _real_extract(self, url):

6515

display_id = 'notifications'

6516

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6517

self._report_playlist_authcheck(ytcfg)

6518

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6519

6520

6521

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6522

IE_DESC = 'YouTube search'

6523

IE_NAME = 'youtube:search'

6524

_SEARCH_KEY = 'ytsearch'

6525

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6526

_TESTS = [{

6527

'url': 'ytsearch5:youtube-dl test video',

6528

'playlist_count': 5,

6529

'info_dict': {

6530

'id': 'youtube-dl test video',

6531

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6537

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6538

_SEARCH_KEY = 'ytsearchdate'

6539

IE_DESC = 'YouTube search, newest videos first'

6540

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6541

_TESTS = [{

6542

'url': 'ytsearchdate5:youtube-dl test video',

6543

'playlist_count': 5,

6544

'info_dict': {

6545

'id': 'youtube-dl test video',

6546

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6552

IE_DESC = 'YouTube search URLs with sorting and filter support'

6553

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6554

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6555

_TESTS = [{

6556

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6557

'playlist_mincount': 5,

6558

'info_dict': {

6559

'id': 'youtube-dl test video',

6560

'title': 'youtube-dl test video',

6561

}

6562

}, {

6563

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6564

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6571

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6576

# 'entries': [{

6577

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

# Channel results

'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',

6584

'info_dict': {

6585

'id': 'kurzgesagt',

6586

'title': 'kurzgesagt',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6592

'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

6593

'ie_key': 'YoutubeTab',

6594

'channel': 'Kurzgesagt – In a Nutshell',

6595

'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',

6596

'title': 'Kurzgesagt – In a Nutshell',

6597

'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',

6598

'playlist_count': int, # XXX: should have a way of saying > 1

6599

'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',

'thumbnails': list

}

}],

'params': {'extract_flat': True, 'playlist_items': '1'},

6604

'playlist_mincount': 1,

6605

}, {

6606

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6607

'only_matching': True,

6608

}]

6609

6610

def _real_extract(self, url):

6611

qs = parse_qs(url)

6612

query = (qs.get('search_query') or qs.get('q'))[0]

6613

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6614

6615

6616

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6617

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6618

IE_NAME = 'youtube:music:search_url'

6619

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6620

_TESTS = [{

6621

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6622

'playlist_count': 16,

6623

'info_dict': {

6624

'id': 'royalty free music',

6625

'title': 'royalty free music',

6626

}

6627

}, {

6628

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6629

'playlist_mincount': 30,

6630

'info_dict': {

6631

'id': 'royalty free music - songs',

6632

'title': 'royalty free music - songs',

6633

},

6634

'params': {'extract_flat': 'in_playlist'}

6635

}, {

6636

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6637

'playlist_mincount': 30,

6638

'info_dict': {

6639

'id': 'royalty free music - community playlists',

6640

'title': 'royalty free music - community playlists',

6641

},

6642

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6647

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6648

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6649

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6650

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6651

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6652

}

6653

6654

def _real_extract(self, url):

6655

qs = parse_qs(url)

6656

query = (qs.get('search_query') or qs.get('q'))[0]

6657

params = qs.get('sp', (None,))[0]

6658

if params:

6659

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6660

else:

6661

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6662

params = self._SECTIONS.get(section)

6663

if not params:

6664

section = None

6665

title = join_nonempty(query, section, delim=' - ')

6666

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6667

6668

6669

class YoutubeFeedsInfoExtractor(InfoExtractor):

6670

"""

6671

Base class for feed extractors

6672

Subclasses must re-define the _FEED_NAME property.

6673

"""

6674

_LOGIN_REQUIRED = True

6675

_FEED_NAME = 'feeds'

6676

6677

def _real_initialize(self):

6678

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6683

6684

def _real_extract(self, url):

6685

return self.url_result(

6686

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6687

6688

6689

class YoutubeWatchLaterIE(InfoExtractor):

6690

IE_NAME = 'youtube:watchlater'

6691

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6692

_VALID_URL = r':ytwatchlater'

6693

_TESTS = [{

6694

'url': ':ytwatchlater',

6695

'only_matching': True,

6696

}]

6697

6698

def _real_extract(self, url):

6699

return self.url_result(

6700

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6701

6702

6703

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6704

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6705

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6706

_FEED_NAME = 'recommended'

6707

_LOGIN_REQUIRED = False

6708

_TESTS = [{

6709

'url': ':ytrec',

6710

'only_matching': True,

6711

}, {

6712

'url': ':ytrecommended',

6713

'only_matching': True,

6714

}, {

6715

'url': 'https://youtube.com',

6716

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6721

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6722

_VALID_URL = r':ytsub(?:scription)?s?'

6723

_FEED_NAME = 'subscriptions'

6724

_TESTS = [{

6725

'url': ':ytsubs',

6726

'only_matching': True,

6727

}, {

6728

'url': ':ytsubscriptions',

6729

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6734

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6735

_VALID_URL = r':ythis(?:tory)?'

6736

_FEED_NAME = 'history'

6737

_TESTS = [{

6738

'url': ':ythistory',

6739

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6744

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6745

IE_NAME = 'youtube:stories'

6746

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6747

_TESTS = [{

6748

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6749

'only_matching': True,

6750

}]

6751

6752

def _real_extract(self, url):

6753

playlist_id = f'RLTD{self._match_id(url)}'

6754

return self.url_result(

6755

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6756

ie=YoutubeTabIE, video_id=playlist_id)

6757

6758

6759

class YoutubeShortsAudioPivotIE(InfoExtractor):

6760

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6761

IE_NAME = 'youtube:shorts:pivot:audio'

6762

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6763

_TESTS = [{

6764

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6765

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6770

"""

6771

Generates sfv_audio_pivot browse params for this video id

6772

"""

6773

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6774

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6775

6776

def _real_extract(self, url):

6777

video_id = self._match_id(url)

6778

return self.url_result(

6779

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6784

IE_NAME = 'youtube:truncated_url'

6785

IE_DESC = False # Do not list

6786

_VALID_URL = r'''(?x)

6787

(?:https?://)?

6788

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6789

(?:watch\?(?:

6790

feature=[a-z_]+|

6791

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6804

'only_matching': True,

6805

}, {

6806

'url': 'https://www.youtube.com/watch?',

6807

'only_matching': True,

6808

}, {

6809

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6810

'only_matching': True,

6811

}, {

6812

'url': 'https://www.youtube.com/watch?feature=foo',

6813

'only_matching': True,

6814

}, {

6815

'url': 'https://www.youtube.com/watch?hl=en-GB',

6816

'only_matching': True,

6817

}, {

6818

'url': 'https://www.youtube.com/watch?t=2372',

6819

'only_matching': True,

6820

}]

6821

6822

def _real_extract(self, url):

6823

raise ExtractorError(

6824

'Did you forget to quote the URL? Remember that & is a meta '

6825

'character in most shells, so you want to put the URL in quotes, '

6826

'like youtube-dl '

6827

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6828

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6833

IE_NAME = 'youtube:clip'

6834

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6835

_TESTS = [{

6836

# FIXME: Other metadata should be extracted from the clip, not from the base video

6837

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6838

'info_dict': {

6839

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6840

'ext': 'mp4',

6841

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6846

'categories': ['Gaming'],

6847

'channel': 'Scott The Woz',

6848

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6849

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6850

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6851

'like_count': int,

6852

'playable_in_embed': True,

6853

'tags': 'count:17',

6854

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6855

'title': 'Mobile Games on Console - Scott The Woz',

6856

'upload_date': '20210920',

6857

'uploader': 'Scott The Woz',

6858

'uploader_id': 'scottthewoz',

6859

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6860

'view_count': int,

6861

'live_status': 'not_live',

6862

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6867

clip_id = self._match_id(url)

6868

_, data = self._extract_webpage(url, clip_id)

6869

6870

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6871

if not video_id:

6872

raise ExtractorError('Unable to find video ID')

6873

6874

clip_data = traverse_obj(data, (

6875

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6876

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6877

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6878

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6879

6880

return {

6881

'_type': 'url_transparent',

6882

'url': f'https://www.youtube.com/watch?v={video_id}',

6883

'ie_key': YoutubeIE.ie_key(),

6884

'id': clip_id,

6885

'section_start': int(clip_data['startTimeMs']) / 1000,

6886

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6891

IE_NAME = 'youtube:truncated_id'

6892

IE_DESC = False # Do not list

6893

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6894

6895

_TESTS = [{

6896

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6897

'only_matching': True,

6898

}]

6899

6900

def _real_extract(self, url):

6901

video_id = self._match_id(url)

6902

raise ExtractorError(

6903

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6904

expected=True)