jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import enum
	6	import hashlib
	7	import itertools
	8	import json
	9	import math
	10	import os.path
	11	import random
	12	import re
	13	import sys
	14	import threading
	15	import time
	16	import traceback
	17	import urllib.error
	18	import urllib.parse
	19
	20	from .common import InfoExtractor, SearchInfoExtractor
	21	from .openload import PhantomJSwrapper
	22	from ..compat import functools
	23	from ..jsinterp import JSInterpreter
	24	from ..utils import (
	25	NO_DEFAULT,
	26	ExtractorError,
	27	LazyList,
	28	UserNotLive,
	29	bug_reports_message,
	30	classproperty,
	31	clean_html,
	32	datetime_from_str,
	33	dict_get,
	34	filter_dict,
	35	float_or_none,
	36	format_field,
	37	get_first,
	38	int_or_none,
	39	is_html,
	40	join_nonempty,
	41	js_to_json,
	42	mimetype2ext,
	43	network_exceptions,
	44	orderedSet,
	45	parse_codecs,
	46	parse_count,
	47	parse_duration,
	48	parse_iso8601,
	49	parse_qs,
	50	qualities,
	51	remove_start,
	52	smuggle_url,
	53	str_or_none,
	54	str_to_int,
	55	strftime_or_none,
	56	traverse_obj,
	57	try_get,
	58	unescapeHTML,
	59	unified_strdate,
	60	unified_timestamp,
	61	unsmuggle_url,
	62	update_url_query,
	63	url_or_none,
	64	urljoin,
	65	variadic,
	66	)
	67
	68	# any clients starting with _ cannot be explicitly requested by the user
	69	INNERTUBE_CLIENTS = {
	70	'web': {
	71	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	72	'INNERTUBE_CONTEXT': {
	73	'client': {
	74	'clientName': 'WEB',
	75	'clientVersion': '2.20220801.00.00',
	76	}
	77	},
	78	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	79	},
	80	'web_embedded': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB_EMBEDDED_PLAYER',
	85	'clientVersion': '1.20220731.00.00',
	86	},
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	89	},
	90	'web_music': {
	91	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	92	'INNERTUBE_HOST': 'music.youtube.com',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_REMIX',
	96	'clientVersion': '1.20220727.01.00',
	97	}
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	100	},
	101	'web_creator': {
	102	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_CREATOR',
	106	'clientVersion': '1.20220726.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	110	},
	111	'android': {
	112	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'ANDROID',
	116	'clientVersion': '17.31.35',
	117	'androidSdkVersion': 30,
	118	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	119	}
	120	},
	121	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	122	'REQUIRE_JS_PLAYER': False
	123	},
	124	'android_embedded': {
	125	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	126	'INNERTUBE_CONTEXT': {
	127	'client': {
	128	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	129	'clientVersion': '17.31.35',
	130	'androidSdkVersion': 30,
	131	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	132	},
	133	},
	134	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	135	'REQUIRE_JS_PLAYER': False
	136	},
	137	'android_music': {
	138	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	139	'INNERTUBE_CONTEXT': {
	140	'client': {
	141	'clientName': 'ANDROID_MUSIC',
	142	'clientVersion': '5.16.51',
	143	'androidSdkVersion': 30,
	144	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	145	}
	146	},
	147	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	148	'REQUIRE_JS_PLAYER': False
	149	},
	150	'android_creator': {
	151	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	152	'INNERTUBE_CONTEXT': {
	153	'client': {
	154	'clientName': 'ANDROID_CREATOR',
	155	'clientVersion': '22.30.100',
	156	'androidSdkVersion': 30,
	157	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '17.33.2',
	171	'deviceModel': 'iPhone14,3',
	172	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	173	}
	174	},
	175	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	176	'REQUIRE_JS_PLAYER': False
	177	},
	178	'ios_embedded': {
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MESSAGES_EXTENSION',
	182	'clientVersion': '17.33.2',
	183	'deviceModel': 'iPhone14,3',
	184	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '5.21',
	196	'deviceModel': 'iPhone14,3',
	197	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	198	},
	199	},
	200	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	201	'REQUIRE_JS_PLAYER': False
	202	},
	203	'ios_creator': {
	204	'INNERTUBE_CONTEXT': {
	205	'client': {
	206	'clientName': 'IOS_CREATOR',
	207	'clientVersion': '22.33.101',
	208	'deviceModel': 'iPhone14,3',
	209	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	210	},
	211	},
	212	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	213	'REQUIRE_JS_PLAYER': False
	214	},
	215	# mweb has 'ultralow' formats
	216	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	217	'mweb': {
	218	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	219	'INNERTUBE_CONTEXT': {
	220	'client': {
	221	'clientName': 'MWEB',
	222	'clientVersion': '2.20220801.00.00',
	223	}
	224	},
	225	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	226	},
	227	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	228	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	229	'tv_embedded': {
	230	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	231	'INNERTUBE_CONTEXT': {
	232	'client': {
	233	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	234	'clientVersion': '2.0',
	235	},
	236	},
	237	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	238	},
	239	}
	240
	241
	242	def _split_innertube_client(client_name):
	243	variant, *base = client_name.rsplit('.', 1)
	244	if base:
	245	return variant, base[0], variant
	246	base, *variant = client_name.split('_', 1)
	247	return client_name, base, variant[0] if variant else None
	248
	249
	250	def build_innertube_clients():
	251	THIRD_PARTY = {
	252	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	253	}
	254	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	255	priority = qualities(BASE_CLIENTS[::-1])
	256
	257	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	258	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	259	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	260	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	261	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	262
	263	_, base_client, variant = _split_innertube_client(client)
	264	ytcfg['priority'] = 10 * priority(base_client)
	265
	266	if not variant:
	267	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	268	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	269	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	270	embedscreen['priority'] -= 3
	271	elif variant == 'embedded':
	272	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	273	ytcfg['priority'] -= 2
	274	else:
	275	ytcfg['priority'] -= 3
	276
	277
	278	build_innertube_clients()
	279
	280
	281	class BadgeType(enum.Enum):
	282	AVAILABILITY_UNLISTED = enum.auto()
	283	AVAILABILITY_PRIVATE = enum.auto()
	284	AVAILABILITY_PUBLIC = enum.auto()
	285	AVAILABILITY_PREMIUM = enum.auto()
	286	AVAILABILITY_SUBSCRIPTION = enum.auto()
	287	LIVE_NOW = enum.auto()
	288
	289
	290	class YoutubeBaseInfoExtractor(InfoExtractor):
	291	"""Provide base functions for Youtube extractors"""
	292
	293	_RESERVED_NAMES = (
	294	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	295	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	296	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	297	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	298
	299	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	300
	301	# _NETRC_MACHINE = 'youtube'
	302
	303	# If True it will raise an error if no login info is provided
	304	_LOGIN_REQUIRED = False
	305
	306	_INVIDIOUS_SITES = (
	307	# invidious-redirect websites
	308	r'(?:www\.)?redirect\.invidious\.io',
	309	r'(?:(?:www\|dev)\.)?invidio\.us',
	310	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	311	r'(?:www\.)?invidious\.pussthecat\.org',
	312	r'(?:www\.)?invidious\.zee\.li',
	313	r'(?:www\.)?invidious\.ethibox\.fr',
	314	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	315	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	316	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	317	# youtube-dl invidious instances list
	318	r'(?:(?:www\|no)\.)?invidiou\.sh',
	319	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	320	r'(?:www\.)?invidious\.kabi\.tk',
	321	r'(?:www\.)?invidious\.mastodon\.host',
	322	r'(?:www\.)?invidious\.zapashcanon\.fr',
	323	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	324	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	325	r'(?:www\.)?invidious\.himiko\.cloud',
	326	r'(?:www\.)?invidious\.reallyancient\.tech',
	327	r'(?:www\.)?invidious\.tube',
	328	r'(?:www\.)?invidiou\.site',
	329	r'(?:www\.)?invidious\.site',
	330	r'(?:www\.)?invidious\.xyz',
	331	r'(?:www\.)?invidious\.nixnet\.xyz',
	332	r'(?:www\.)?invidious\.048596\.xyz',
	333	r'(?:www\.)?invidious\.drycat\.fr',
	334	r'(?:www\.)?inv\.skyn3t\.in',
	335	r'(?:www\.)?tube\.poal\.co',
	336	r'(?:www\.)?tube\.connect\.cafe',
	337	r'(?:www\.)?vid\.wxzm\.sx',
	338	r'(?:www\.)?vid\.mint\.lgbt',
	339	r'(?:www\.)?vid\.puffyan\.us',
	340	r'(?:www\.)?yewtu\.be',
	341	r'(?:www\.)?yt\.elukerio\.org',
	342	r'(?:www\.)?yt\.lelux\.fi',
	343	r'(?:www\.)?invidious\.ggc-project\.de',
	344	r'(?:www\.)?yt\.maisputain\.ovh',
	345	r'(?:www\.)?ytprivate\.com',
	346	r'(?:www\.)?invidious\.13ad\.de',
	347	r'(?:www\.)?invidious\.toot\.koeln',
	348	r'(?:www\.)?invidious\.fdn\.fr',
	349	r'(?:www\.)?watch\.nettohikari\.com',
	350	r'(?:www\.)?invidious\.namazso\.eu',
	351	r'(?:www\.)?invidious\.silkky\.cloud',
	352	r'(?:www\.)?invidious\.exonip\.de',
	353	r'(?:www\.)?invidious\.riverside\.rocks',
	354	r'(?:www\.)?invidious\.blamefran\.net',
	355	r'(?:www\.)?invidious\.moomoo\.de',
	356	r'(?:www\.)?ytb\.trom\.tf',
	357	r'(?:www\.)?yt\.cyberhost\.uk',
	358	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	359	r'(?:www\.)?qklhadlycap4cnod\.onion',
	360	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	361	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	362	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	363	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	364	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	365	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	366	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	367	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	368	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	369	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	370	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	371	r'(?:www\.)?piped\.kavin\.rocks',
	372	r'(?:www\.)?piped\.silkky\.cloud',
	373	r'(?:www\.)?piped\.tokhmi\.xyz',
	374	r'(?:www\.)?piped\.moomoo\.me',
	375	r'(?:www\.)?il\.ax',
	376	r'(?:www\.)?piped\.syncpundit\.com',
	377	r'(?:www\.)?piped\.mha\.fi',
	378	r'(?:www\.)?piped\.mint\.lgbt',
	379	r'(?:www\.)?piped\.privacy\.com\.de',
	380	)
	381
	382	# extracted from account/account_menu ep
	383	# XXX: These are the supported YouTube UI and API languages,
	384	# which is slightly different from languages supported for translation in YouTube studio
	385	_SUPPORTED_LANG_CODES = [
	386	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	387	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	388	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	389	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	390	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	391	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	392	]
	393
	394	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	395
	396	@functools.cached_property
	397	def _preferred_lang(self):
	398	"""
	399	Returns a language code supported by YouTube for the user preferred language.
	400	Returns None if no preferred language set.
	401	"""
	402	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	403	if not preferred_lang:
	404	return
	405	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	406	raise ExtractorError(
	407	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	408	expected=True)
	409	elif preferred_lang != 'en':
	410	self.report_warning(
	411	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	412	return preferred_lang
	413
	414	def _initialize_consent(self):
	415	cookies = self._get_cookies('https://www.youtube.com/')
	416	if cookies.get('__Secure-3PSID'):
	417	return
	418	consent_id = None
	419	consent = cookies.get('CONSENT')
	420	if consent:
	421	if 'YES' in consent.value:
	422	return
	423	consent_id = self._search_regex(
	424	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	425	if not consent_id:
	426	consent_id = random.randint(100, 999)
	427	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	428
	429	def _initialize_pref(self):
	430	cookies = self._get_cookies('https://www.youtube.com/')
	431	pref_cookie = cookies.get('PREF')
	432	pref = {}
	433	if pref_cookie:
	434	try:
	435	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	436	except ValueError:
	437	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	438	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	439	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	440
	441	def _real_initialize(self):
	442	self._initialize_pref()
	443	self._initialize_consent()
	444	self._check_login_required()
	445
	446	def _check_login_required(self):
	447	if self._LOGIN_REQUIRED and not self._cookies_passed:
	448	self.raise_login_required('Login details are needed to download this content', method='cookies')
	449
	450	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	451	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	452
	453	def _get_default_ytcfg(self, client='web'):
	454	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	455
	456	def _get_innertube_host(self, client='web'):
	457	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	458
	459	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	460	# try_get but with fallback to default ytcfg client values when present
	461	_func = lambda y: try_get(y, getter, expected_type)
	462	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	463
	464	def _extract_client_name(self, ytcfg, default_client='web'):
	465	return self._ytcfg_get_safe(
	466	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	467	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	468
	469	def _extract_client_version(self, ytcfg, default_client='web'):
	470	return self._ytcfg_get_safe(
	471	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	472	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	473
	474	def _select_api_hostname(self, req_api_hostname, default_client=None):
	475	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	476	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	477
	478	def _extract_api_key(self, ytcfg=None, default_client='web'):
	479	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	480
	481	def _extract_context(self, ytcfg=None, default_client='web'):
	482	context = get_first(
	483	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	484	# Enforce language and tz for extraction
	485	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	486	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	487	return context
	488
	489	_SAPISID = None
	490
	491	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	492	time_now = round(time.time())
	493	if self._SAPISID is None:
	494	yt_cookies = self._get_cookies('https://www.youtube.com')
	495	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	496	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	497	sapisid_cookie = dict_get(
	498	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	499	if sapisid_cookie and sapisid_cookie.value:
	500	self._SAPISID = sapisid_cookie.value

1

import base64

import calendar

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

21

from .openload import PhantomJSwrapper

22

from ..compat import functools

23

from ..jsinterp import JSInterpreter

24

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

69

INNERTUBE_CLIENTS = {

70

'web': {

71

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

72

'INNERTUBE_CONTEXT': {

73

'client': {

74

'clientName': 'WEB',

75

'clientVersion': '2.20220801.00.00',

76

}

77

},

78

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

79

},

80

'web_embedded': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB_EMBEDDED_PLAYER',

85

'clientVersion': '1.20220731.00.00',

86

},

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

89

},

90

'web_music': {

91

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

92

'INNERTUBE_HOST': 'music.youtube.com',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_REMIX',

96

'clientVersion': '1.20220727.01.00',

97

}

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

100

},

101

'web_creator': {

102

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_CREATOR',

106

'clientVersion': '1.20220726.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

110

},

111

'android': {

112

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'ANDROID',

116

'clientVersion': '17.31.35',

117

'androidSdkVersion': 30,

118

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

119

}

120

},

121

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

122

'REQUIRE_JS_PLAYER': False

123

},

124

'android_embedded': {

125

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

126

'INNERTUBE_CONTEXT': {

127

'client': {

128

'clientName': 'ANDROID_EMBEDDED_PLAYER',

129

'clientVersion': '17.31.35',

130

'androidSdkVersion': 30,

131

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

132

},

133

},

134

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

135

'REQUIRE_JS_PLAYER': False

136

},

137

'android_music': {

138

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

139

'INNERTUBE_CONTEXT': {

140

'client': {

141

'clientName': 'ANDROID_MUSIC',

142

'clientVersion': '5.16.51',

143

'androidSdkVersion': 30,

144

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

145

}

146

},

147

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

148

'REQUIRE_JS_PLAYER': False

149

},

150

'android_creator': {

151

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

152

'INNERTUBE_CONTEXT': {

153

'client': {

154

'clientName': 'ANDROID_CREATOR',

155

'clientVersion': '22.30.100',

156

'androidSdkVersion': 30,

157

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '17.33.2',

171

'deviceModel': 'iPhone14,3',

172

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

173

}

174

},

175

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

176

'REQUIRE_JS_PLAYER': False

177

},

178

'ios_embedded': {

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MESSAGES_EXTENSION',

182

'clientVersion': '17.33.2',

183

'deviceModel': 'iPhone14,3',

184

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '5.21',

196

'deviceModel': 'iPhone14,3',

197

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

198

},

199

},

200

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

201

'REQUIRE_JS_PLAYER': False

202

},

203

'ios_creator': {

204

'INNERTUBE_CONTEXT': {

205

'client': {

206

'clientName': 'IOS_CREATOR',

207

'clientVersion': '22.33.101',

208

'deviceModel': 'iPhone14,3',

209

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

210

},

211

},

212

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

213

'REQUIRE_JS_PLAYER': False

214

},

215

# mweb has 'ultralow' formats

216

# See: https://github.com/yt-dlp/yt-dlp/pull/557

217

'mweb': {

218

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

219

'INNERTUBE_CONTEXT': {

220

'client': {

221

'clientName': 'MWEB',

222

'clientVersion': '2.20220801.00.00',

223

}

224

},

225

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

226

},

227

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

228

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

229

'tv_embedded': {

230

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

231

'INNERTUBE_CONTEXT': {

232

'client': {

233

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

234

'clientVersion': '2.0',

235

},

236

},

237

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

243

variant, *base = client_name.rsplit('.', 1)

244

if base:

245

return variant, base[0], variant

246

base, *variant = client_name.split('_', 1)

247

return client_name, base, variant[0] if variant else None

248

249

250

def build_innertube_clients():

251

THIRD_PARTY = {

252

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

253

}

254

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

255

priority = qualities(BASE_CLIENTS[::-1])

256

257

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

258

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

259

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

260

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

261

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

262

263

_, base_client, variant = _split_innertube_client(client)

264

ytcfg['priority'] = 10 * priority(base_client)

265

266

if not variant:

267

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

268

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

269

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

270

embedscreen['priority'] -= 3

271

elif variant == 'embedded':

272

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

273

ytcfg['priority'] -= 2

274

else:

275

ytcfg['priority'] -= 3

276

277

278

build_innertube_clients()

279

280

281

class BadgeType(enum.Enum):

282

AVAILABILITY_UNLISTED = enum.auto()

283

AVAILABILITY_PRIVATE = enum.auto()

284

AVAILABILITY_PUBLIC = enum.auto()

285

AVAILABILITY_PREMIUM = enum.auto()

286

AVAILABILITY_SUBSCRIPTION = enum.auto()

287

LIVE_NOW = enum.auto()

288

289

290

class YoutubeBaseInfoExtractor(InfoExtractor):

291

"""Provide base functions for Youtube extractors"""

292

293

_RESERVED_NAMES = (

294

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

300

301

# _NETRC_MACHINE = 'youtube'

302

303

# If True it will raise an error if no login info is provided

304

_LOGIN_REQUIRED = False

305

306

_INVIDIOUS_SITES = (

307

# invidious-redirect websites

308

r'(?:www\.)?redirect\.invidious\.io',

309

r'(?:(?:www|dev)\.)?invidio\.us',

310

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

311

r'(?:www\.)?invidious\.pussthecat\.org',

312

r'(?:www\.)?invidious\.zee\.li',

313

r'(?:www\.)?invidious\.ethibox\.fr',

314

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

315

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

316

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

317

# youtube-dl invidious instances list

318

r'(?:(?:www|no)\.)?invidiou\.sh',

319

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

320

r'(?:www\.)?invidious\.kabi\.tk',

321

r'(?:www\.)?invidious\.mastodon\.host',

322

r'(?:www\.)?invidious\.zapashcanon\.fr',

323

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

324

r'(?:www\.)?invidious\.tinfoil-hat\.net',

325

r'(?:www\.)?invidious\.himiko\.cloud',

326

r'(?:www\.)?invidious\.reallyancient\.tech',

327

r'(?:www\.)?invidious\.tube',

328

r'(?:www\.)?invidiou\.site',

329

r'(?:www\.)?invidious\.site',

330

r'(?:www\.)?invidious\.xyz',

331

r'(?:www\.)?invidious\.nixnet\.xyz',

332

r'(?:www\.)?invidious\.048596\.xyz',

333

r'(?:www\.)?invidious\.drycat\.fr',

334

r'(?:www\.)?inv\.skyn3t\.in',

335

r'(?:www\.)?tube\.poal\.co',

336

r'(?:www\.)?tube\.connect\.cafe',

337

r'(?:www\.)?vid\.wxzm\.sx',

338

r'(?:www\.)?vid\.mint\.lgbt',

339

r'(?:www\.)?vid\.puffyan\.us',

340

r'(?:www\.)?yewtu\.be',

341

r'(?:www\.)?yt\.elukerio\.org',

342

r'(?:www\.)?yt\.lelux\.fi',

343

r'(?:www\.)?invidious\.ggc-project\.de',

344

r'(?:www\.)?yt\.maisputain\.ovh',

345

r'(?:www\.)?ytprivate\.com',

346

r'(?:www\.)?invidious\.13ad\.de',

347

r'(?:www\.)?invidious\.toot\.koeln',

348

r'(?:www\.)?invidious\.fdn\.fr',

349

r'(?:www\.)?watch\.nettohikari\.com',

350

r'(?:www\.)?invidious\.namazso\.eu',

351

r'(?:www\.)?invidious\.silkky\.cloud',

352

r'(?:www\.)?invidious\.exonip\.de',

353

r'(?:www\.)?invidious\.riverside\.rocks',

354

r'(?:www\.)?invidious\.blamefran\.net',

355

r'(?:www\.)?invidious\.moomoo\.de',

356

r'(?:www\.)?ytb\.trom\.tf',

357

r'(?:www\.)?yt\.cyberhost\.uk',

358

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

359

r'(?:www\.)?qklhadlycap4cnod\.onion',

360

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

361

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

362

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

363

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

364

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

365

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

366

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

367

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

368

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

369

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

370

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

371

r'(?:www\.)?piped\.kavin\.rocks',

372

r'(?:www\.)?piped\.silkky\.cloud',

373

r'(?:www\.)?piped\.tokhmi\.xyz',

374

r'(?:www\.)?piped\.moomoo\.me',

375

r'(?:www\.)?il\.ax',

376

r'(?:www\.)?piped\.syncpundit\.com',

377

r'(?:www\.)?piped\.mha\.fi',

378

r'(?:www\.)?piped\.mint\.lgbt',

379

r'(?:www\.)?piped\.privacy\.com\.de',

380

)

381

382

# extracted from account/account_menu ep

383

# XXX: These are the supported YouTube UI and API languages,

384

# which is slightly different from languages supported for translation in YouTube studio

385

_SUPPORTED_LANG_CODES = [

386

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

387

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

388

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

389

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

390

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

391

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

392

]

393

394

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

395

396

@functools.cached_property

397

def _preferred_lang(self):

398

"""

399

Returns a language code supported by YouTube for the user preferred language.

400

Returns None if no preferred language set.

401

"""

402

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

403

if not preferred_lang:

404

return

405

if preferred_lang not in self._SUPPORTED_LANG_CODES:

406

raise ExtractorError(

407

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

408

expected=True)

409

elif preferred_lang != 'en':

410

self.report_warning(

411

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

412

return preferred_lang

413

414

def _initialize_consent(self):

415

cookies = self._get_cookies('https://www.youtube.com/')

416

if cookies.get('__Secure-3PSID'):

417

return

418

consent_id = None

419

consent = cookies.get('CONSENT')

420

if consent:

421

if 'YES' in consent.value:

422

return

423

consent_id = self._search_regex(

424

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

425

if not consent_id:

426

consent_id = random.randint(100, 999)

427

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

428

429

def _initialize_pref(self):

430

cookies = self._get_cookies('https://www.youtube.com/')

431

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

436

except ValueError:

437

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

438

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

439

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

440

441

def _real_initialize(self):

442

self._initialize_pref()

443

self._initialize_consent()

444

self._check_login_required()

445

446

def _check_login_required(self):

447

if self._LOGIN_REQUIRED and not self._cookies_passed:

448

self.raise_login_required('Login details are needed to download this content', method='cookies')

449

450

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

451

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

452

453

def _get_default_ytcfg(self, client='web'):

454

return copy.deepcopy(INNERTUBE_CLIENTS[client])

455

456

def _get_innertube_host(self, client='web'):

457

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

458

459

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

460

# try_get but with fallback to default ytcfg client values when present

461

_func = lambda y: try_get(y, getter, expected_type)

462

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

463

464

def _extract_client_name(self, ytcfg, default_client='web'):

465

return self._ytcfg_get_safe(

466

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

467

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

468

469

def _extract_client_version(self, ytcfg, default_client='web'):

470

return self._ytcfg_get_safe(

471

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

472

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

473

474

def _select_api_hostname(self, req_api_hostname, default_client=None):

475

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

476

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

477

478

def _extract_api_key(self, ytcfg=None, default_client='web'):

479

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

480

481

def _extract_context(self, ytcfg=None, default_client='web'):

482

context = get_first(

483

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

484

# Enforce language and tz for extraction

485

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

486

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

492

time_now = round(time.time())

493

if self._SAPISID is None:

494

yt_cookies = self._get_cookies('https://www.youtube.com')

495

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

496

# See: https://github.com/yt-dlp/yt-dlp/issues/393

497

sapisid_cookie = dict_get(

498

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

499

if sapisid_cookie and sapisid_cookie.value:

500

self._SAPISID = sapisid_cookie.value

501

self.write_debug('Extracted SAPISID cookie')

502

# SAPISID cookie is required if not already present

503

if not yt_cookies.get('SAPISID'):

504

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

505

self._set_cookie(

506

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

507

else:

508

self._SAPISID = False

509

if not self._SAPISID:

510

return None

511

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

512

sapisidhash = hashlib.sha1(

513

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

514

return f'SAPISIDHASH {time_now}_{sapisidhash}'

515

516

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

517

note='Downloading API JSON', errnote='Unable to download API page',

518

context=None, api_key=None, api_hostname=None, default_client='web'):

519

520

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

521

data.update(query)

522

real_headers = self.generate_api_headers(default_client=default_client)

523

real_headers.update({'content-type': 'application/json'})

524

if headers:

525

real_headers.update(headers)

526

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

527

or api_key or self._extract_api_key(default_client=default_client))

528

return self._download_json(

529

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

530

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

531

data=json.dumps(data).encode('utf8'), headers=real_headers,

532

query={'key': api_key, 'prettyPrint': 'false'})

533

534

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

535

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

536

537

@staticmethod

538

def _extract_session_index(*data):

539

"""

540

Index of current account in account list.

541

See: https://github.com/yt-dlp/yt-dlp/pull/519

542

"""

543

for ytcfg in data:

544

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

545

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

550

if ytcfg:

551

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

556

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

557

'identity token', default=None, fatal=False)

558

559

@staticmethod

560

def _extract_account_syncid(*args):

561

"""

562

Extract syncId required to download private playlists of secondary channels

563

@params response and/or ytcfg

564

"""

565

for data in args:

566

# ytcfg includes channel_syncid if on secondary channel

567

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

572

lambda x: x['DATASYNC_ID']), str) or '').split('||')

573

if len(sync_ids) >= 2 and sync_ids[1]:

574

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

575

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

580

"""

581

Extracts visitorData from an API response or ytcfg

582

Appears to be used to track session state

583

"""

584

return get_first(

585

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

586

expected_type=str)

587

588

@functools.cached_property

589

def is_authenticated(self):

590

return bool(self._generate_sapisidhash_header())

591

592

def extract_ytcfg(self, video_id, webpage):

593

if not webpage:

594

return {}

595

return self._parse_json(

596

self._search_regex(

597

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

598

default='{}'), video_id, fatal=False) or {}

599

600

def generate_api_headers(

601

self, *, ytcfg=None, account_syncid=None, session_index=None,

602

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

603

604

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

605

headers = {

606

'X-YouTube-Client-Name': str(

607

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

608

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

609

'Origin': origin,

610

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

611

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

612

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

613

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

614

}

615

if session_index is None:

616

session_index = self._extract_session_index(ytcfg)

617

if account_syncid or session_index is not None:

618

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

619

620

auth = self._generate_sapisidhash_header(origin)

621

if auth is not None:

622

headers['Authorization'] = auth

623

headers['X-Origin'] = origin

624

return filter_dict(headers)

625

626

def _download_ytcfg(self, client, video_id):

627

url = {

628

'web': 'https://www.youtube.com',

629

'web_music': 'https://music.youtube.com',

630

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

635

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

636

return self.extract_ytcfg(video_id, webpage) or {}

637

638

@staticmethod

639

def _build_api_continuation_query(continuation, ctp=None):

640

query = {

641

'continuation': continuation

642

}

643

# TODO: Inconsistency with clickTrackingParams.

644

# Currently we have a fixed ctp contained within context (from ytcfg)

645

# and a ctp in root query for continuation.

646

if ctp:

647

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

652

next_continuation = try_get(

653

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

654

lambda x: x['continuation']['reloadContinuationData']), dict)

655

if not next_continuation:

656

return

657

continuation = next_continuation.get('continuation')

658

if not continuation:

659

return

660

ctp = next_continuation.get('clickTrackingParams')

661

return cls._build_api_continuation_query(continuation, ctp)

662

663

@classmethod

664

def _extract_continuation_ep_data(cls, continuation_ep: dict):

665

if isinstance(continuation_ep, dict):

666

continuation = try_get(

667

continuation_ep, lambda x: x['continuationCommand']['token'], str)

668

if not continuation:

669

return

670

ctp = continuation_ep.get('clickTrackingParams')

671

return cls._build_api_continuation_query(continuation, ctp)

672

673

@classmethod

674

def _extract_continuation(cls, renderer):

675

next_continuation = cls._extract_next_continuation_data(renderer)

676

if next_continuation:

677

return next_continuation

678

679

return traverse_obj(renderer, (

680

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

681

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

682

), get_all=False, expected_type=cls._extract_continuation_ep_data)

683

684

@classmethod

685

def _extract_alerts(cls, data):

686

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

687

if not isinstance(alert_dict, dict):

688

continue

689

for alert in alert_dict.values():

690

alert_type = alert.get('type')

691

if not alert_type:

692

continue

693

message = cls._get_text(alert, 'text')

694

if message:

695

yield alert_type, message

696

697

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

698

errors, warnings = [], []

699

for alert_type, alert_message in alerts:

700

if alert_type.lower() == 'error' and fatal:

701

errors.append([alert_type, alert_message])

702

elif alert_message not in self._IGNORED_WARNINGS:

703

warnings.append([alert_type, alert_message])

704

705

for alert_type, alert_message in (warnings + errors[:-1]):

706

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

707

if errors:

708

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

709

710

def _extract_and_report_alerts(self, data, *args, **kwargs):

711

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

712

713

def _extract_badges(self, renderer: dict):

714

privacy_icon_map = {

715

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

716

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

717

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

722

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

723

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

728

'private': BadgeType.AVAILABILITY_PRIVATE,

729

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

730

'live': BadgeType.LIVE_NOW,

731

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

736

badge_type = (

737

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

738

or badge_style_map.get(traverse_obj(badge, 'style'))

739

)

740

if badge_type:

741

badges.append({'type': badge_type})

742

continue

743

744

# fallback, won't work in some languages

745

label = traverse_obj(badge, 'label', expected_type=str, default='')

746

for match, label_badge_type in label_map.items():

747

if match in label.lower():

748

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

755

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

756

757

@staticmethod

758

def _get_text(data, *path_list, max_runs=None):

759

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

764

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

765

obj = [obj]

766

for item in obj:

767

text = try_get(item, lambda x: x['simpleText'], str)

768

if text:

769

return text

770

runs = try_get(item, lambda x: x['runs'], list) or []

771

if not runs and isinstance(item, list):

772

runs = item

773

774

runs = runs[:min(len(runs), max_runs or len(runs))]

775

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

780

count_text = self._get_text(data, *path_list) or ''

781

count = parse_count(count_text)

782

if count is None:

783

count = str_to_int(

784

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

789

"""

790

Extract thumbnails from thumbnails dict

791

@param path_list: path list to level that contains 'thumbnails' key

792

"""

793

thumbnails = []

794

for path in path_list or [()]:

795

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

796

thumbnail_url = url_or_none(thumbnail.get('url'))

797

if not thumbnail_url:

798

continue

799

# Sometimes youtube gives a wrong thumbnail URL. See:

800

# https://github.com/yt-dlp/yt-dlp/issues/233

801

# https://github.com/ytdl-org/youtube-dl/issues/28023

802

if 'maxresdefault' in thumbnail_url:

803

thumbnail_url = thumbnail_url.split('?')[0]

804

thumbnails.append({

805

'url': thumbnail_url,

806

'height': int_or_none(thumbnail.get('height')),

807

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

813

"""

814

Extracts a relative time from string and converts to dt object

815

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

820

if start:

821

return datetime_from_str(start)

822

try:

823

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

828

if not text:

829

return

830

dt = self.extract_relative_time(text)

831

timestamp = None

832

if isinstance(dt, datetime.datetime):

833

timestamp = calendar.timegm(dt.timetuple())

834

835

if timestamp is None:

836

timestamp = (

837

unified_timestamp(text) or unified_timestamp(

838

self._search_regex(

839

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

840

text.lower(), 'time text', default=None)))

841

842

if text and timestamp is None and self._preferred_lang in (None, 'en'):

843

self.report_warning(

844

f'Cannot parse localized time text "{text}"', only_once=True)

845

return timestamp

846

847

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

848

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

849

default_client='web'):

850

for retry in self.RetryManager():

851

try:

852

response = self._call_api(

853

ep=ep, fatal=True, headers=headers,

854

video_id=item_id, query=query, note=note,

855

context=self._extract_context(ytcfg, default_client),

856

api_key=self._extract_api_key(ytcfg, default_client),

857

api_hostname=api_hostname, default_client=default_client)

858

except ExtractorError as e:

859

if not isinstance(e.cause, network_exceptions):

860

return self._error_or_warning(e, fatal=fatal)

861

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

866

if not is_html(first_bytes):

867

yt_error = try_get(

868

self._parse_json(

869

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

870

lambda x: x['error']['message'], str)

871

if yt_error:

872

self._report_alerts([('ERROR', yt_error)], fatal=False)

873

# Downloading page may result in intermittent 5xx HTTP error

874

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

875

# We also want to catch all other network exceptions since errors in later pages can be troublesome

876

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

877

if e.cause.code not in (403, 429):

878

retry.error = e

879

continue

880

return self._error_or_warning(e, fatal=fatal)

881

882

try:

883

self._extract_and_report_alerts(response, only_once=True)

884

except ExtractorError as e:

885

# YouTube servers may return errors we want to retry on in a 200 OK response

886

# See: https://github.com/yt-dlp/yt-dlp/issues/839

887

if 'unknown error' in e.msg.lower():

888

retry.error = e

889

continue

890

return self._error_or_warning(e, fatal=fatal)

891

# Youtube sometimes sends incomplete data

892

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

893

if not traverse_obj(response, *variadic(check_get_keys)):

894

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

901

return re.match(r'https?://music\.youtube\.com/', url) is not None

902

903

def _extract_video(self, renderer):

904

video_id = renderer.get('videoId')

905

title = self._get_text(renderer, 'title')

906

description = self._get_text(renderer, 'descriptionSnippet')

907

duration = parse_duration(self._get_text(

908

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

909

if duration is None:

910

duration = parse_duration(self._search_regex(

911

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

912

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

913

video_id, default=None, group='duration'))

914

915

view_count = self._get_count(renderer, 'viewCountText')

916

917

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

918

channel_id = traverse_obj(

919

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

920

expected_type=str, get_all=False)

921

time_text = self._get_text(renderer, 'publishedTimeText') or ''

922

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

923

overlay_style = traverse_obj(

924

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

925

get_all=False, expected_type=str)

926

badges = self._extract_badges(renderer)

927

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

928

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

929

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

930

expected_type=str)) or ''

931

url = f'https://www.youtube.com/watch?v={video_id}'

932

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

933

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

942

'duration': duration,

943

'view_count': view_count,

944

'uploader': uploader,

945

'channel_id': channel_id,

946

'thumbnails': thumbnails,

947

'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')

948

if self._configuration_arg('approximate_date', ie_key='youtubetab')

949

else None),

950

'live_status': ('is_upcoming' if scheduled_timestamp is not None

951

else 'was_live' if 'streamed' in time_text.lower()

952

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

953

else None),

954

'release_timestamp': scheduled_timestamp,

955

'availability':

956

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

957

else self._availability(

958

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

959

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

960

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

961

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

966

IE_DESC = 'YouTube'

967

_VALID_URL = r"""(?x)^

968

(

969

(?:https?://|//) # http(s):// or protocol-independent URL

970

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

971

(?:www\.)?deturl\.com/www\.youtube\.com|

972

(?:www\.)?pwnyoutube\.com|

973

(?:www\.)?hooktube\.com|

974

(?:www\.)?yourepeat\.com|

975

tube\.majestyc\.net|

976

%(invidious)s|

977

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

978

(?:.*?\#/)? # handle anchor (#/) redirect urls

979

(?: # the various things that can precede the ID:

980

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

981

|(?: # or the v= param in all its forms

982

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

983

(?:\?|\#!?) # the params delimiter ? or # or #!

984

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

990

vid\.plus| # or vid.plus/xxxx

991

zwearz\.com/watch| # or zwearz.com/watch/xxxx

992

%(invidious)s

993

)/

994

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

995

)

996

)? # all until now is optional -> you can pass the naked ID

997

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

998

(?(1).+)? # if we found the ID, everything can follow

999

(?:\#|$)""" % {

1000

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1014

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1015

\1''',

1016

# https://wordpress.org/plugins/lazy-load-for-videos/

1017

r'''(?xs)

1018

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1019

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

]

_PLAYER_INFO_RE = (

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1024

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1025

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1026

)

1027

_formats = {

1028

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1029

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1030

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1031

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1032

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1033

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1034

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1035

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1036

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1037

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1038

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1039

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1040

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1041

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1042

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1043

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1044

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1045

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1050

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1051

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1052

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1053

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1054

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1055

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1056

1057

# Apple HTTP Live Streaming

1058

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1059

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1060

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1061

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1062

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1063

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1064

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1065

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1066

1067

# DASH mp4 video

1068

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1069

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1070

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1071

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1072

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1073

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1074

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1075

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1076

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1077

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1078

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1079

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1080

1081

# Dash mp4 audio

1082

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1083

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1084

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1085

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1086

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1087

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1088

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1089

1090

# Dash webm

1091

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1092

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1093

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1094

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1095

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1096

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1097

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1098

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1099

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1100

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1101

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1102

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1103

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1104

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1105

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1106

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1107

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1108

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1109

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1110

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1111

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1112

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1113

1114

# Dash webm audio

1115

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1116

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1117

1118

# Dash webm audio with opus inside

1119

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1120

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1121

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1122

1123

# RTMP (unnamed)

1124

'_rtmp': {'protocol': 'rtmp'},

1125

1126

# av01 video only formats sometimes served with "unknown" codecs

1127

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1128

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1129

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1130

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1131

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1132

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1133

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1134

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1135

}

1136

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1148

'uploader': 'Philipp Hagemeister',

1149

'uploader_id': 'phihag',

1150

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1151

'channel': 'Philipp Hagemeister',

1152

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1153

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1154

'upload_date': '20121002',

1155

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1156

'categories': ['Science & Technology'],

1157

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1162

'playable_in_embed': True,

1163

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1164

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1169

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1174

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1179

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1180

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1181

'uploader': 'SET India',

1182

'uploader_id': 'setindia',

1183

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1184

'age_limit': 18,

1185

},

1186

'skip': 'Private video',

1187

},

1188

{

1189

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1190

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1195

'uploader': 'Philipp Hagemeister',

1196

'uploader_id': 'phihag',

1197

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1198

'channel': 'Philipp Hagemeister',

1199

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1200

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1201

'upload_date': '20121002',

1202

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1203

'categories': ['Science & Technology'],

1204

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1209

'playable_in_embed': True,

1210

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1211

'live_status': 'not_live',

1212

'age_limit': 0,

1213

'comment_count': int,

1214

'channel_follower_count': int

1215

},

1216

'params': {

1217

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1222

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1227

'uploader_id': '8KVIDEO',

1228

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1229

'description': '',

1230

'uploader': '8KVIDEO',

1231

'title': 'UHDTV TEST 8K VIDEO.mp4'

1232

},

1233

'params': {

1234

'youtube_include_dash_manifest': True,

1235

'format': '141',

1236

},

1237

'skip': 'format 141 not served anymore',

1238

},

1239

# DASH manifest with encrypted signature

1240

{

1241

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1246

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1247

'duration': 244,

1248

'uploader': 'AfrojackVEVO',

1249

'uploader_id': 'AfrojackVEVO',

1250

'upload_date': '20131011',

1251

'abr': 129.495,

1252

'like_count': int,

1253

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1254

'playable_in_embed': True,

1255

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1256

'view_count': int,

1257

'track': 'The Spark',

1258

'live_status': 'not_live',

1259

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1260

'channel': 'Afrojack',

1261

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1262

'tags': 'count:19',

1263

'availability': 'public',

1264

'categories': ['Music'],

1265

'age_limit': 0,

1266

'alt_title': 'The Spark',

1267

'channel_follower_count': int

1268

},

1269

'params': {

1270

'youtube_include_dash_manifest': True,

1271

'format': '141/bestaudio[ext=m4a]',

1272

},

1273

},

1274

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1275

{

1276

'note': 'Embed allowed age-gate video',

1277

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1282

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1283

'duration': 142,

1284

'uploader': 'The Witcher',

1285

'uploader_id': 'WitcherGame',

1286

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1287

'upload_date': '20140605',

1288

'age_limit': 18,

1289

'categories': ['Gaming'],

1290

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1291

'availability': 'needs_auth',

1292

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1293

'like_count': int,

1294

'channel': 'The Witcher',

1295

'live_status': 'not_live',

1296

'tags': 'count:17',

1297

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1298

'playable_in_embed': True,

1299

'view_count': int,

1300

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1305

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1310

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1311

'upload_date': '20200408',

1312

'uploader_id': 'FlyingKitty900',

1313

'uploader': 'FlyingKitty',

1314

'age_limit': 18,

1315

'availability': 'needs_auth',

1316

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1317

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1318

'channel': 'FlyingKitty',

1319

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1320

'view_count': int,

1321

'categories': ['Entertainment'],

1322

'live_status': 'not_live',

1323

'tags': ['Flyingkitty', 'godzilla 2'],

1324

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1325

'like_count': int,

1326

'duration': 177,

1327

'playable_in_embed': True,

1328

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1333

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1334

'info_dict': {

1335

'id': 'Tq92D6wQ1mg',

1336

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1337

'ext': 'mp4',

1338

'upload_date': '20191228',

1339

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1340

'uploader': 'Projekt Melody',

1341

'description': 'md5:17eccca93a786d51bc67646756894066',

1342

'age_limit': 18,

1343

'like_count': int,

1344

'availability': 'needs_auth',

1345

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1346

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1347

'view_count': int,

1348

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1349

'channel': 'Projekt Melody',

1350

'live_status': 'not_live',

1351

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1352

'playable_in_embed': True,

1353

'categories': ['Entertainment'],

1354

'duration': 106,

1355

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1356

'comment_count': int,

1357

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1362

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1367

'uploader': 'Herr Lurik',

1368

'uploader_id': 'st3in234',

1369

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1370

'upload_date': '20130730',

1371

'track': 'Such mich find mich',

1372

'age_limit': 0,

1373

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1374

'like_count': int,

1375

'playable_in_embed': False,

1376

'creator': 'OOMPH!',

1377

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1378

'view_count': int,

1379

'alt_title': 'Such mich find mich',

1380

'duration': 210,

1381

'channel': 'Herr Lurik',

1382

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1383

'categories': ['Music'],

1384

'availability': 'public',

1385

'uploader_url': 'http://www.youtube.com/user/st3in234',

1386

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1387

'live_status': 'not_live',

1388

'artist': 'OOMPH!',

1389

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1394

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1395

'only_matching': True,

1396

},

1397

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1398

# YouTube Red ad is not captured for creator

1399

{

1400

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1406

'uploader_id': 'deadmau5',

1407

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1408

'creator': 'deadmau5',

1409

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1410

'uploader': 'deadmau5',

1411

'title': 'Deadmau5 - Some Chords (HD)',

1412

'alt_title': 'Some Chords',

1413

'availability': 'public',

1414

'tags': 'count:14',

1415

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1416

'view_count': int,

1417

'live_status': 'not_live',

1418

'channel': 'deadmau5',

1419

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1420

'like_count': int,

1421

'track': 'Some Chords',

1422

'artist': 'deadmau5',

1423

'playable_in_embed': True,

1424

'age_limit': 0,

1425

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1426

'categories': ['Music'],

1427

'album': 'Some Chords',

1428

'channel_follower_count': int

1429

},

1430

'expected_warnings': [

1431

'DASH manifest missing',

1432

]

1433

},

1434

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1435

{

1436

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1442

'uploader_id': 'olympic',

1443

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1444

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1445

'uploader': 'Olympics',

1446

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1447

'like_count': int,

1448

'release_timestamp': 1343767800,

1449

'playable_in_embed': True,

1450

'categories': ['Sports'],

1451

'release_date': '20120731',

1452

'channel': 'Olympics',

1453

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1454

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1455

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1456

'age_limit': 0,

1457

'availability': 'public',

1458

'live_status': 'was_live',

1459

'view_count': int,

1460

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1461

'channel_follower_count': int

1462

},

1463

'params': {

1464

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1474

'duration': 85,

1475

'upload_date': '20110310',

1476

'uploader_id': 'AllenMeow',

1477

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1478

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1479

'uploader': '孫ᄋᄅ',

1480

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1481

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1486

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1487

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1488

'view_count': int,

1489

'categories': ['People & Blogs'],

1490

'like_count': int,

1491

'live_status': 'not_live',

1492

'availability': 'unlisted',

1493

'comment_count': int,

1494

'channel_follower_count': int

1495

},

1496

},

1497

# url_encoded_fmt_stream_map is empty string

1498

{

1499

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1504

'description': '',

1505

'upload_date': '20150404',

1506

'uploader_id': 'spbelect',

1507

'uploader': 'Наблюдатели Петербурга',

1508

},

1509

'params': {

1510

'skip_download': 'requires avconv',

1511

},

1512

'skip': 'This live event has ended.',

1513

},

1514

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1515

{

1516

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1521

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1522

'duration': 220,

1523

'upload_date': '20150625',

1524

'uploader_id': 'dorappi2000',

1525

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1526

'uploader': 'dorappi2000',

1527

'formats': 'mincount:31',

1528

},

1529

'skip': 'not actual anymore',

1530

},

1531

# DASH manifest with segment_list

1532

{

1533

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1534

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1539

'uploader': 'Airtek',

1540

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1541

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1542

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1543

},

1544

'params': {

1545

'youtube_include_dash_manifest': True,

1546

'format': '135', # bestvideo

1547

},

1548

'skip': 'This live event has ended.',

1549

},

1550

{

1551

# Multifeed videos (multiple cameras), URL is for Main Camera

1552

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1553

'info_dict': {

1554

'id': 'jvGDaLqkpTg',

1555

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1556

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1563

'description': 'md5:e03b909557865076822aa169218d6a5d',

1564

'duration': 10643,

1565

'upload_date': '20161111',

1566

'uploader': 'Team PGP',

1567

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1568

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1575

'description': 'md5:e03b909557865076822aa169218d6a5d',

1576

'duration': 10991,

1577

'upload_date': '20161111',

1578

'uploader': 'Team PGP',

1579

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1580

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1587

'description': 'md5:e03b909557865076822aa169218d6a5d',

1588

'duration': 10995,

1589

'upload_date': '20161111',

1590

'uploader': 'Team PGP',

1591

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1592

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1599

'description': 'md5:e03b909557865076822aa169218d6a5d',

1600

'duration': 10990,

1601

'upload_date': '20161111',

1602

'uploader': 'Team PGP',

1603

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1604

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1609

},

1610

'skip': 'Not multifeed anymore',

1611

},

1612

{

1613

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1614

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1615

'info_dict': {

1616

'id': 'gVfLd0zydlo',

1617

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1618

},

1619

'playlist_count': 2,

1620

'skip': 'Not multifeed anymore',

1621

},

1622

{

1623

'url': 'https://vid.plus/FlRa-iH7PGw',

1624

'only_matching': True,

1625

},

1626

{

1627

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1628

'only_matching': True,

1629

},

1630

{

1631

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1632

# Also tests cut-off URL expansion in video description (see

1633

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1634

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1635

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1640

'alt_title': 'Dark Walk',

1641

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1642

'duration': 133,

1643

'upload_date': '20151119',

1644

'uploader_id': 'IronSoulElf',

1645

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1646

'uploader': 'IronSoulElf',

1647

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1648

'track': 'Dark Walk',

1649

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1650

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1651

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1652

'categories': ['Film & Animation'],

1653

'view_count': int,

1654

'live_status': 'not_live',

1655

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1656

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1657

'tags': 'count:13',

1658

'availability': 'public',

1659

'channel': 'IronSoulElf',

1660

'playable_in_embed': True,

1661

'like_count': int,

1662

'age_limit': 0,

1663

'channel_follower_count': int

1664

},

1665

'params': {

1666

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1671

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1672

'only_matching': True,

1673

},

1674

{

1675

# Video with yt:stretch=17:0

1676

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1681

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1682

'upload_date': '20151107',

1683

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1684

'uploader': 'CH GAMER DROID',

1685

},

1686

'params': {

1687

'skip_download': True,

1688

},

1689

'skip': 'This video does not exist.',

1690

},

1691

{

1692

# Video with incomplete 'yt:stretch=16:'

1693

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1694

'only_matching': True,

1695

},

1696

{

1697

# Video licensed under Creative Commons

1698

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1703

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1704

'duration': 721,

1705

'upload_date': '20150128',

1706

'uploader_id': 'BerkmanCenter',

1707

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1708

'uploader': 'The Berkman Klein Center for Internet & Society',

1709

'license': 'Creative Commons Attribution license (reuse allowed)',

1710

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1711

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1712

'like_count': int,

1713

'age_limit': 0,

1714

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1715

'channel': 'The Berkman Klein Center for Internet & Society',

1716

'availability': 'public',

1717

'view_count': int,

1718

'categories': ['Education'],

1719

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1720

'live_status': 'not_live',

1721

'playable_in_embed': True,

1722

'comment_count': int,

1723

'channel_follower_count': int

1724

},

1725

'params': {

1726

'skip_download': True,

},

},

{

# Channel-like uploader_url

1731

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1736

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1737

'duration': 4060,

1738

'upload_date': '20151120',

1739

'uploader': 'Bernie Sanders',

1740

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1741

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1742

'license': 'Creative Commons Attribution license (reuse allowed)',

1743

'playable_in_embed': True,

1744

'tags': 'count:12',

1745

'like_count': int,

1746

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1747

'age_limit': 0,

1748

'availability': 'public',

1749

'categories': ['News & Politics'],

1750

'channel': 'Bernie Sanders',

1751

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1752

'view_count': int,

1753

'live_status': 'not_live',

1754

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1755

'comment_count': int,

1756

'channel_follower_count': int

1757

},

1758

'params': {

1759

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1764

'only_matching': True,

1765

},

1766

{

1767

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1768

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1769

'only_matching': True,

1770

},

1771

{

1772

# Rental video preview

1773

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1778

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1779

'upload_date': '20150811',

1780

'uploader': 'FlixMatrix',

1781

'uploader_id': 'FlixMatrixKaravan',

1782

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1783

'license': 'Standard YouTube License',

1784

},

1785

'params': {

1786

'skip_download': True,

1787

},

1788

'skip': 'This video is not available.',

1789

},

1790

{

1791

# YouTube Red video with episode data

1792

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1797

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1798

'duration': 2085,

1799

'upload_date': '20170118',

1800

'uploader': 'Vsauce',

1801

'uploader_id': 'Vsauce',

1802

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1803

'series': 'Mind Field',

1804

'season_number': 1,

1805

'episode_number': 1,

1806

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1807

'tags': 'count:12',

1808

'view_count': int,

1809

'availability': 'public',

1810

'age_limit': 0,

1811

'channel': 'Vsauce',

1812

'episode': 'Episode 1',

1813

'categories': ['Entertainment'],

1814

'season': 'Season 1',

1815

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1816

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1817

'like_count': int,

1818

'playable_in_embed': True,

1819

'live_status': 'not_live',

1820

'channel_follower_count': int

1821

},

1822

'params': {

1823

'skip_download': True,

1824

},

1825

'expected_warnings': [

1826

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1831

# as inappropriate or offensive to some audiences.

1832

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1837

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1838

'duration': 965,

1839

'upload_date': '20140124',

1840

'uploader': 'New Century Foundation',

1841

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1842

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1843

},

1844

'params': {

1845

'skip_download': True,

1846

},

1847

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1852

'only_matching': True,

1853

},

1854

{

1855

# geo restricted to JP

1856

'url': 'sJL6WA-aGkQ',

1857

'only_matching': True,

1858

},

1859

{

1860

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1861

'only_matching': True,

1862

},

1863

{

1864

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1865

'only_matching': True,

1866

},

1867

{

1868

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1869

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1870

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1875

'only_matching': True,

1876

},

1877

{

1878

# Video with unsupported adaptive stream type formats

1879

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1884

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1885

'duration': 433,

1886

'upload_date': '20130923',

1887

'uploader': 'Amelia Putri Harwita',

1888

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1889

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1890

'formats': 'maxcount:10',

1891

},

1892

'params': {

1893

'skip_download': True,

1894

'youtube_include_dash_manifest': False,

1895

},

1896

'skip': 'not actual anymore',

1897

},

1898

{

1899

# Youtube Music Auto-generated description

1900

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1905

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1906

'upload_date': '20190312',

1907

'uploader': 'Stephen - Topic',

1908

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1909

'artist': 'Stephen',

1910

'track': 'Voyeur Girl',

1911

'album': 'it\'s too much love to know my dear',

1912

'release_date': '20190313',

1913

'release_year': 2019,

1914

'alt_title': 'Voyeur Girl',

1915

'view_count': int,

1916

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1917

'playable_in_embed': True,

1918

'like_count': int,

1919

'categories': ['Music'],

1920

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1921

'channel': 'Stephen',

1922

'availability': 'public',

1923

'creator': 'Stephen',

1924

'duration': 169,

1925

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1926

'age_limit': 0,

1927

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1928

'tags': 'count:11',

1929

'live_status': 'not_live',

1930

'channel_follower_count': int

1931

},

1932

'params': {

1933

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1938

'only_matching': True,

1939

},

1940

{

1941

# invalid -> valid video id redirection

1942

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1947

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1948

'upload_date': '20090125',

1949

'uploader': 'Prochorowka',

1950

'uploader_id': 'Prochorowka',

1951

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1952

'artist': 'Panjabi MC',

1953

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1954

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1955

},

1956

'params': {

1957

'skip_download': True,

1958

},

1959

'skip': 'Video unavailable',

1960

},

1961

{

1962

# empty description results in an empty string

1963

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1970

'uploader_id': 'ElevageOrVert',

1971

'uploader': 'ElevageOrVert',

1972

'view_count': int,

1973

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1974

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1975

'like_count': int,

1976

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1977

'tags': [],

1978

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1979

'availability': 'public',

1980

'age_limit': 0,

1981

'categories': ['Pets & Animals'],

1982

'duration': 7,

1983

'playable_in_embed': True,

1984

'live_status': 'not_live',

1985

'channel': 'ElevageOrVert',

1986

'channel_follower_count': int

1987

},

1988

'params': {

1989

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1994

# see [2] for an example with '};' inside ytInitialPlayerResponse

1995

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1996

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1997

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2002

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2003

'upload_date': '20130831',

2004

'uploader_id': 'kudvenkat',

2005

'uploader': 'kudvenkat',

2006

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2007

'like_count': int,

2008

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2009

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2010

'live_status': 'not_live',

2011

'categories': ['Education'],

2012

'availability': 'public',

2013

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2014

'tags': 'count:12',

2015

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2020

'comment_count': int,

2021

'channel_follower_count': int

2022

},

2023

'params': {

2024

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2029

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2030

'only_matching': True,

2031

},

2032

{

2033

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2034

'only_matching': True,

2035

},

2036

{

2037

# https://github.com/ytdl-org/youtube-dl/pull/28094

2038

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2044

'upload_date': '20141120',

2045

'uploader': 'The Cinematic Orchestra - Topic',

2046

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2047

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2048

'artist': 'The Cinematic Orchestra',

2049

'track': 'Burn Out',

2050

'album': 'Every Day',

2051

'like_count': int,

2052

'live_status': 'not_live',

2053

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2058

'creator': 'The Cinematic Orchestra',

2059

'channel': 'The Cinematic Orchestra',

2060

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2061

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2062

'availability': 'public',

2063

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2064

'categories': ['Music'],

2065

'playable_in_embed': True,

2066

'channel_follower_count': int

2067

},

2068

'params': {

2069

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2074

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2075

'only_matching': True,

2076

},

2077

{

2078

# controversial video, requires bpctr/contentCheckOk

2079

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2084

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2085

'uploader': 'CBS Mornings',

2086

'uploader_id': 'CBSThisMorning',

2087

'upload_date': '20140716',

2088

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2089

'duration': 170,

2090

'categories': ['News & Politics'],

2091

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2092

'view_count': int,

2093

'channel': 'CBS Mornings',

2094

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2095

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2096

'age_limit': 18,

2097

'availability': 'needs_auth',

2098

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2099

'like_count': int,

2100

'live_status': 'not_live',

2101

'playable_in_embed': True,

2102

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2107

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2112

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2113

'upload_date': '20201120',

2114

'uploader': 'Walk around Japan',

2115

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2116

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2117

'duration': 1456,

2118

'categories': ['Travel & Events'],

2119

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2120

'view_count': int,

2121

'channel': 'Walk around Japan',

2122

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2123

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2124

'age_limit': 0,

2125

'availability': 'public',

2126

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2127

'live_status': 'not_live',

2128

'playable_in_embed': True,

2129

'channel_follower_count': int

2130

},

2131

'params': {

2132

'skip_download': True,

2133

},

2134

}, {

2135

# Has multiple audio streams

2136

'url': 'WaOKSUlf4TM',

2137

'only_matching': True

2138

}, {

2139

# Requires Premium: has format 141 when requested using YTM url

2140

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2141

'only_matching': True

2142

}, {

2143

# multiple subtitles with same lang_code

2144

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2145

'only_matching': True,

2146

}, {

2147

# Force use android client fallback

2148

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2149

'info_dict': {

2150

'id': 'YOelRv7fMxY',

2151

'title': 'DIGGING A SECRET TUNNEL Part 1',

2152

'ext': '3gp',

2153

'upload_date': '20210624',

2154

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2155

'uploader': 'colinfurze',

2156

'uploader_id': 'colinfurze',

2157

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2158

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2159

'duration': 596,

2160

'categories': ['Entertainment'],

2161

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2162

'view_count': int,

2163

'channel': 'colinfurze',

2164

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2165

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2166

'age_limit': 0,

2167

'availability': 'public',

2168

'like_count': int,

2169

'live_status': 'not_live',

2170

'playable_in_embed': True,

2171

'channel_follower_count': int

2172

},

2173

'params': {

2174

'format': '17', # 3gp format available on android

2175

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2180

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2181

'only_matching': True,

2182

'params': {

2183

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2188

'only_matching': True,

2189

}, {

2190

'note': 'Storyboards',

2191

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2197

'uploader_id': 'scishow',

2198

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2199

'upload_date': '20140324',

2200

'uploader': 'SciShow',

2201

'like_count': int,

2202

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2203

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2204

'view_count': int,

2205

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2206

'playable_in_embed': True,

2207

'tags': 'count:12',

2208

'uploader_url': 'http://www.youtube.com/user/scishow',

2209

'availability': 'public',

2210

'channel': 'SciShow',

2211

'live_status': 'not_live',

2212

'duration': 248,

2213

'categories': ['Education'],

2214

'age_limit': 0,

2215

'channel_follower_count': int

2216

}, 'params': {'format': 'mhtml', 'skip_download': True}

2217

}, {

2218

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2219

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2224

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2225

'uploader': 'Leon Nguyen',

2226

'uploader_id': 'VNSXIII',

2227

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2228

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2229

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2234

'tags': 'count:23',

2235

'playable_in_embed': True,

2236

'live_status': 'not_live',

2237

'upload_date': '20220103',

2238

'like_count': int,

2239

'availability': 'public',

2240

'channel': 'Leon Nguyen',

2241

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2242

'comment_count': int,

2243

'channel_follower_count': int

2244

}

2245

}, {

2246

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2247

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2252

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2253

'uploader': 'Leon Nguyen',

2254

'uploader_id': 'VNSXIII',

2255

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2256

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2257

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2262

'tags': 'count:23',

2263

'playable_in_embed': True,

2264

'live_status': 'not_live',

2265

'upload_date': '20220102',

2266

'like_count': int,

2267

'availability': 'public',

2268

'channel': 'Leon Nguyen',

2269

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2270

'comment_count': int,

2271

'channel_follower_count': int

2272

},

2273

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2274

}, {

2275

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2276

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2281

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2282

'uploader': 'Quackity',

2283

'uploader_id': 'QuackityHQ',

2284

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2285

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2286

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2291

'tags': 'count:26',

2292

'playable_in_embed': True,

2293

'live_status': 'not_live',

2294

'release_timestamp': 1641172509,

2295

'release_date': '20220103',

2296

'upload_date': '20220103',

2297

'like_count': int,

2298

'availability': 'public',

2299

'channel': 'Quackity',

2300

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2301

'channel_follower_count': int

2302

}

2303

},

2304

{ # continuous livestream. Microformat upload date should be preferred.

2305

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2306

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2307

'info_dict': {

2308

'id': 'kgx4WGK0oNU',

2309

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2310

'ext': 'mp4',

2311

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2312

'availability': 'public',

2313

'age_limit': 0,

2314

'release_timestamp': 1637975704,

2315

'upload_date': '20210619',

2316

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2317

'live_status': 'is_live',

2318

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2319

'uploader': '阿鲍Abao',

2320

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2321

'channel': 'Abao in Tokyo',

2322

'channel_follower_count': int,

2323

'release_date': '20211127',

2324

'tags': 'count:39',

2325

'categories': ['People & Blogs'],

2326

'like_count': int,

2327

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2328

'view_count': int,

2329

'playable_in_embed': True,

2330

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2331

},

2332

'params': {'skip_download': True}

2333

}, {

2334

# Story. Requires specific player params to work.

2335

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2340

'view_count': int,

2341

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2342

'upload_date': '20220526',

2343

'categories': ['Education'],

2344

'title': 'Story',

2345

'channel': 'IT\'S HISTORY',

2346

'description': '',

2347

'uploader_id': 'BlastfromthePast',

2348

'duration': 12,

2349

'uploader': 'IT\'S HISTORY',

2350

'playable_in_embed': True,

2351

'age_limit': 0,

2352

'live_status': 'not_live',

2353

'tags': [],

2354

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2355

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2356

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2357

},

2358

'skip': 'stories get removed after some period of time',

2359

}, {

2360

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2365

'upload_date': '20220323',

2366

'like_count': int,

2367

'availability': 'unlisted',

2368

'channel': 'nao20010128nao',

2369

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2370

'age_limit': 0,

2371

'uploader': 'nao20010128nao',

2372

'uploader_id': 'nao20010128nao',

2373

'categories': ['Music'],

2374

'view_count': int,

2375

'description': '',

2376

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2377

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2378

'live_status': 'not_live',

2379

'playable_in_embed': True,

2380

'channel_follower_count': int,

2381

'duration': 6,

2382

'tags': [],

2383

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2384

}

2385

}, {

2386

# Prefer primary title+description language metadata by default

2387

# Do not prefer translated description if primary is empty

2388

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2393

'description': '',

2394

'channel': 'cole-dlp-test-acc',

2395

'tags': [],

2396

'view_count': int,

2397

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2398

'like_count': int,

2399

'playable_in_embed': True,

2400

'availability': 'unlisted',

2401

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2402

'age_limit': 0,

2403

'duration': 5,

2404

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2405

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2406

'live_status': 'not_live',

2407

'upload_date': '20220908',

2408

'categories': ['People & Blogs'],

2409

'uploader': 'cole-dlp-test-acc',

2410

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2411

},

2412

'params': {'skip_download': True}

2413

}, {

2414

# Extractor argument: prefer translated title+description

2415

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2420

'tags': [],

2421

'duration': 5,

2422

'live_status': 'not_live',

2423

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2424

'upload_date': '20220728',

2425

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2426

'view_count': int,

2427

'categories': ['People & Blogs'],

2428

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2429

'title': 'dlp test video title translated (fr)',

2430

'availability': 'public',

2431

'uploader': 'cole-dlp-test-acc',

2432

'age_limit': 0,

2433

'description': 'dlp test video description translated (fr)',

2434

'playable_in_embed': True,

2435

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2436

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2437

},

2438

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2439

'expected_warnings': [r'Preferring "fr" translated fields'],

2440

}, {

2441

'note': '6 channel audio',

2442

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2443

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2449

{

2450

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2451

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2456

'upload_date': '20080526',

2457

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2458

'uploader': 'Christopher Sykes',

2459

'uploader_id': 'ChristopherJSykes',

2460

'age_limit': 0,

2461

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2462

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2463

'playable_in_embed': True,

2464

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2465

'like_count': int,

2466

'comment_count': int,

2467

'channel': 'Christopher Sykes',

2468

'live_status': 'not_live',

2469

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2470

'availability': 'public',

2471

'duration': 195,

2472

'view_count': int,

2473

'categories': ['Science & Technology'],

2474

'channel_follower_count': int,

2475

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2476

},

2477

'params': {

2478

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2485

from ..utils import parse_qs

2486

2487

qs = parse_qs(url)

2488

if qs.get('list', [None])[0]:

2489

return False

2490

return super().suitable(url)

2491

2492

def __init__(self, *args, **kwargs):

2493

super().__init__(*args, **kwargs)

2494

self._code_cache = {}

2495

self._player_cache = {}

2496

2497

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2498

lock = threading.Lock()

2499

start_time = time.time()

2500

formats = [f for f in formats if f.get('is_from_start')]

2501

2502

def refetch_manifest(format_id, delay):

2503

nonlocal formats, start_time, is_live

2504

if time.time() <= start_time + delay:

2505

return

2506

2507

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2508

video_details = traverse_obj(

2509

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2510

microformats = traverse_obj(

2511

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2512

expected_type=dict, default=[])

2513

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2514

is_live = live_status == 'is_live'

2515

start_time = time.time()

2516

2517

def mpd_feed(format_id, delay):

2518

"""

2519

@returns (manifest_url, manifest_stream_number, is_live) or None

2520

"""

2521

with lock:

2522

refetch_manifest(format_id, delay)

2523

2524

f = next((f for f in formats if f['format_id'] == format_id), None)

2525

if not f:

2526

if not is_live:

2527

self.to_screen(f'{video_id}: Video is no longer live')

2528

else:

2529

self.report_warning(

2530

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2531

return None

2532

return f['manifest_url'], f['manifest_stream_number'], is_live

2533

2534

for f in formats:

2535

f['is_live'] = is_live

2536

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2537

live_start_time, mpd_feed, not is_live and f.copy())

2538

if is_live:

2539

f['fragments'] = gen

2540

f['protocol'] = 'http_dash_segments_generator'

2541

else:

2542

f['fragments'] = LazyList(gen({}))

2543

del f['is_from_start']

2544

2545

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2546

FETCH_SPAN, MAX_DURATION = 5, 432000

2547

2548

mpd_url, stream_number, is_live = None, None, True

2549

2550

begin_index = 0

2551

download_start_time = ctx.get('start') or time.time()

2552

2553

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2554

if lack_early_segments:

2555

self.report_warning(bug_reports_message(

2556

'Starting download from the last 120 hours of the live stream since '

2557

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2558

lack_early_segments = True

2559

2560

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2561

fragments, fragment_base_url = None, None

2562

2563

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2564

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2565

# Obtain from MPD's maximum seq value

2566

old_mpd_url = mpd_url

2567

last_error = ctx.pop('last_error', None)

2568

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2569

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2570

or (mpd_url, stream_number, False))

2571

if not refresh_sequence:

2572

if expire_fast and not is_live:

2573

return False, last_seq

2574

elif old_mpd_url == mpd_url:

2575

return True, last_seq

2576

if manifestless_orig_fmt:

2577

fmt_info = manifestless_orig_fmt

2578

else:

2579

try:

2580

fmts, _ = self._extract_mpd_formats_and_subtitles(

2581

mpd_url, None, note=False, errnote=False, fatal=False)

2582

except ExtractorError:

2583

fmts = None

2584

if not fmts:

2585

no_fragment_score += 2

2586

return False, last_seq

2587

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2588

fragments = fmt_info['fragments']

2589

fragment_base_url = fmt_info['fragment_base_url']

2590

assert fragment_base_url

2591

2592

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2593

return True, _last_seq

2594

2595

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2596

while is_live:

2597

fetch_time = time.time()

2598

if no_fragment_score > 30:

2599

return

2600

if last_segment_url:

2601

# Obtain from "X-Head-Seqnum" header value from each segment

2602

try:

2603

urlh = self._request_webpage(

2604

last_segment_url, None, note=False, errnote=False, fatal=False)

2605

except ExtractorError:

2606

urlh = None

2607

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2608

if last_seq is None:

2609

no_fragment_score += 2

2610

last_segment_url = None

2611

continue

2612

else:

2613

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2614

no_fragment_score += 2

2615

if not should_continue:

2616

continue

2617

2618

if known_idx > last_seq:

2619

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2625

# skip from the start when it's negative value

2626

known_idx = last_seq + begin_index

2627

if lack_early_segments:

2628

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2629

try:

2630

for idx in range(known_idx, last_seq):

2631

# do not update sequence here or you'll get skipped some part of it

2632

should_continue, _ = _extract_sequence_from_mpd(False, False)

2633

if not should_continue:

2634

known_idx = idx - 1

2635

raise ExtractorError('breaking out of outer loop')

2636

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2637

yield {

2638

'url': last_segment_url,

2639

'fragment_count': last_seq,

2640

}

2641

if known_idx == last_seq:

2642

no_fragment_score += 5

2643

else:

2644

no_fragment_score = 0

2645

known_idx = last_seq

2646

except ExtractorError:

2647

continue

2648

2649

if manifestless_orig_fmt:

2650

# Stop at the first iteration if running for post-live manifestless;

2651

# fragment count no longer increase since it starts

2652

break

2653

2654

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2655

2656

def _extract_player_url(self, *ytcfgs, webpage=None):

2657

player_url = traverse_obj(

2658

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2659

get_all=False, expected_type=str)

2660

if not player_url:

2661

return

2662

return urljoin('https://www.youtube.com', player_url)

2663

2664

def _download_player_url(self, video_id, fatal=False):

2665

res = self._download_webpage(

2666

'https://www.youtube.com/iframe_api',

2667

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2668

if res:

2669

player_version = self._search_regex(

2670

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2671

if player_version:

2672

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2673

2674

def _signature_cache_id(self, example_sig):

2675

""" Return a string representation of a signature """

2676

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2677

2678

@classmethod

2679

def _extract_player_info(cls, player_url):

2680

for player_re in cls._PLAYER_INFO_RE:

2681

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2686

return id_m.group('id')

2687

2688

def _load_player(self, video_id, player_url, fatal=True):

2689

player_id = self._extract_player_info(player_url)

2690

if player_id not in self._code_cache:

2691

code = self._download_webpage(

2692

player_url, video_id, fatal=fatal,

2693

note='Downloading player ' + player_id,

2694

errnote='Download of %s failed' % player_url)

2695

if code:

2696

self._code_cache[player_id] = code

2697

return self._code_cache.get(player_id)

2698

2699

def _extract_signature_function(self, video_id, player_url, example_sig):

2700

player_id = self._extract_player_info(player_url)

2701

2702

# Read from filesystem cache

2703

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2704

assert os.path.basename(func_id) == func_id

2705

2706

self.write_debug(f'Extracting signature function {func_id}')

2707

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2708

2709

if not cache_spec:

2710

code = self._load_player(video_id, player_url)

2711

if code:

2712

res = self._parse_sig_js(code)

2713

test_string = ''.join(map(chr, range(len(example_sig))))

2714

cache_spec = [ord(c) for c in res(test_string)]

2715

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2716

2717

return lambda s: ''.join(s[i] for i in cache_spec)

2718

2719

def _print_sig_code(self, func, example_sig):

2720

if not self.get_param('youtube_print_sig_code'):

2721

return

2722

2723

def gen_sig_code(idxs):

2724

def _genslice(start, end, step):

2725

starts = '' if start == 0 else str(start)

2726

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2727

steps = '' if step == 1 else (':%d' % step)

2728

return f's[{starts}{ends}{steps}]'

2729

2730

step = None

2731

# Quelch pyflakes warnings - start will be set when step is set

2732

start = '(Never used)'

2733

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2738

step = None

2739

continue

2740

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2750

2751

test_string = ''.join(map(chr, range(len(example_sig))))

2752

cache_res = func(test_string)

2753

cache_spec = [ord(c) for c in cache_res]

2754

expr_code = ' + '.join(gen_sig_code(cache_spec))

2755

signature_id_tuple = '(%s)' % (

2756

', '.join(str(len(p)) for p in example_sig.split('.')))

2757

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2758

' return %s\n') % (signature_id_tuple, expr_code)

2759

self.to_screen('Extracted signature function:\n' + code)

2760

2761

def _parse_sig_js(self, jscode):

2762

funcname = self._search_regex(

2763

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2764

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2765

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2766

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2767

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2768

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2769

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2770

# Obsolete patterns

2771

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2772

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2773

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2774

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2775

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2776

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2777

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2778

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2779

jscode, 'Initial JS player signature function name', group='sig')

2780

2781

jsi = JSInterpreter(jscode)

2782

initial_function = jsi.extract_function(funcname)

2783

return lambda s: initial_function([s])

2784

2785

def _cached(self, func, *cache_id):

2786

def inner(*args, **kwargs):

2787

if cache_id not in self._player_cache:

2788

try:

2789

self._player_cache[cache_id] = func(*args, **kwargs)

2790

except ExtractorError as e:

2791

self._player_cache[cache_id] = e

2792

except Exception as e:

2793

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2794

2795

ret = self._player_cache[cache_id]

2796

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2802

"""Turn the encrypted s field into a working signature"""

2803

extract_sig = self._cached(

2804

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2805

func = extract_sig(video_id, player_url, s)

2806

self._print_sig_code(func, s)

2807

return func(s)

2808

2809

def _decrypt_nsig(self, s, video_id, player_url):

2810

"""Turn the encrypted n field into a working signature"""

2811

if player_url is None:

2812

raise ExtractorError('Cannot decrypt nsig without player_url')

2813

player_url = urljoin('https://www.youtube.com', player_url)

2814

2815

try:

2816

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2817

except ExtractorError as e:

2818

raise ExtractorError('Unable to extract nsig function code', cause=e)

2819

if self.get_param('youtube_print_sig_code'):

2820

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2821

2822

try:

2823

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2824

ret = extract_nsig(jsi, func_code)(s)

2825

except JSInterpreter.Exception as e:

2826

try:

2827

jsi = PhantomJSwrapper(self, timeout=5000)

2828

except ExtractorError:

2829

raise e

2830

self.report_warning(

2831

f'Native nsig extraction failed: Trying with PhantomJS\n'

2832

f' n = {s} ; player = {player_url}', video_id)

2833

self.write_debug(e)

2834

2835

args, func_body = func_code

2836

ret = jsi.execute(

2837

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2838

video_id=video_id, note='Executing signature code').strip()

2839

2840

self.write_debug(f'Decrypted nsig {s} => {ret}')

2841

return ret

2842

2843

def _extract_n_function_name(self, jscode):

2844

funcname, idx = self._search_regex(

2845

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2846

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2851

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2852

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2853

2854

def _extract_n_function_code(self, video_id, player_url):

2855

player_id = self._extract_player_info(player_url)

2856

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

2857

jscode = func_code or self._load_player(video_id, player_url)

2858

jsi = JSInterpreter(jscode)

2859

2860

if func_code:

2861

return jsi, player_id, func_code

2862

2863

func_name = self._extract_n_function_name(jscode)

2864

2865

# For redundancy

2866

func_code = self._search_regex(

2867

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

2868

# NB: The end of the regex is intentionally kept strict

2869

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

2870

jscode, 'nsig function', group=('var', 'code'), default=None)

2871

if func_code:

2872

func_code = ([func_code[0]], func_code[1])

2873

else:

2874

self.write_debug('Extracting nsig function with jsinterp')

2875

func_code = jsi.extract_function_code(func_name)

2876

2877

self.cache.store('youtube-nsig', player_id, func_code)

2878

return jsi, player_id, func_code

2879

2880

def _extract_n_function_from_code(self, jsi, func_code):

2881

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2887

raise

2888

except Exception as e:

2889

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2890

2891

if ret.startswith('enhanced_except_'):

2892

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2898

"""

2899

Extract signatureTimestamp (sts)

2900

Required to tell API what sig/player version is in use.

2901

"""

2902

sts = None

2903

if isinstance(ytcfg, dict):

2904

sts = int_or_none(ytcfg.get('STS'))

2905

2906

if not sts:

2907

# Attempt to extract from player

2908

if player_url is None:

2909

error_msg = 'Cannot extract signature timestamp without player_url.'

2910

if fatal:

2911

raise ExtractorError(error_msg)

2912

self.report_warning(error_msg)

2913

return

2914

code = self._load_player(video_id, player_url, fatal=fatal)

2915

if code:

2916

sts = int_or_none(self._search_regex(

2917

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2918

'JS player signature timestamp', group='sts', fatal=fatal))

2919

return sts

2920

2921

def _mark_watched(self, video_id, player_responses):

2922

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2923

label = 'fully ' if is_full else ''

2924

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2925

expected_type=url_or_none)

2926

if not url:

2927

self.report_warning(f'Unable to mark {label}watched')

2928

return

2929

parsed_url = urllib.parse.urlparse(url)

2930

qs = urllib.parse.parse_qs(parsed_url.query)

2931

2932

# cpn generation algorithm is reverse engineered from base.js.

2933

# In fact it works even with dummy cpn.

2934

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2935

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2936

2937

# # more consistent results setting it to right before the end

2938

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2949

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2956

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2957

2958

self._download_webpage(

2959

url, video_id, f'Marking {label}watched',

2960

'Unable to mark watched', fatal=False)

2961

2962

@classmethod

2963

def _extract_from_webpage(cls, url, webpage):

2964

# Invidious Instances

2965

# https://github.com/yt-dlp/yt-dlp/issues/195

2966

# https://github.com/iv-org/invidious/pull/1730

2967

mobj = re.search(

2968

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2969

webpage)

2970

if mobj:

2971

yield cls.url_result(mobj.group('url'), cls)

2972

raise cls.StopExtraction()

2973

2974

yield from super()._extract_from_webpage(url, webpage)

2975

2976

# lazyYT YouTube embed

2977

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2978

yield cls.url_result(unescapeHTML(id_), cls, id_)

2979

2980

# Wordpress "YouTube Video Importer" plugin

2981

for m in re.findall(r'''(?x)<div[^>]+

2982

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2983

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2984

yield cls.url_result(m[-1], cls, m[-1])

2985

2986

@classmethod

2987

def extract_id(cls, url):

2988

video_id = cls.get_temp_id(url)

2989

if not video_id:

2990

raise ExtractorError(f'Invalid URL: {url}')

2991

return video_id

2992

2993

def _extract_chapters_from_json(self, data, duration):

2994

chapter_list = traverse_obj(

2995

data, (

2996

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2997

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2998

), expected_type=list)

2999

3000

return self._extract_chapters(

3001

chapter_list,

3002

chapter_time=lambda chapter: float_or_none(

3003

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3004

chapter_title=lambda chapter: traverse_obj(

3005

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3006

duration=duration)

3007

3008

def _extract_chapters_from_engagement_panel(self, data, duration):

3009

content_list = traverse_obj(

3010

data,

3011

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3012

expected_type=list, default=[])

3013

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3014

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3015

3016

return next(filter(None, (

3017

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3018

chapter_time, chapter_title, duration)

3019

for contents in content_list)), [])

3020

3021

def _extract_chapters_from_description(self, description, duration):

3022

return self._extract_chapters(

3023

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

3024

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3025

duration=duration, strict=False)

3026

3027

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3032

'title': chapter_title(chapter),

3033

} for chapter in chapter_list or []]

3034

if not strict:

3035

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3036

3037

chapters = [{'start_time': 0}]

3038

for idx, chapter in enumerate(chapter_list):

3039

if chapter['start_time'] is None:

3040

self.report_warning(f'Incomplete chapter {idx}')

3041

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3042

chapters.append(chapter)

3043

elif chapter not in chapters:

3044

self.report_warning(

3045

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3046

return chapters[1:]

3047

3048

def _extract_comment(self, comment_renderer, parent=None):

3049

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3054

3055

# Timestamp is an estimate calculated from the current time and time_text

3056

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3057

timestamp = self._parse_time_text(time_text)

3058

3059

author = self._get_text(comment_renderer, 'authorText')

3060

author_id = try_get(comment_renderer,

3061

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3062

3063

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3064

lambda x: x['likeCount']), str)) or 0

3065

author_thumbnail = try_get(comment_renderer,

3066

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3067

3068

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3069

is_favorited = 'creatorHeart' in (try_get(

3070

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3075

'time_text': time_text,

3076

'like_count': votes,

3077

'is_favorited': is_favorited,

3078

'author': author,

3079

'author_id': author_id,

3080

'author_thumbnail': author_thumbnail,

3081

'author_is_uploader': author_is_uploader,

3082

'parent': parent or 'root'

3083

}

3084

3085

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3086

3087

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3088

3089

def extract_header(contents):

3090

_continuation = None

3091

for content in contents:

3092

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3093

expected_comment_count = self._get_count(

3094

comments_header_renderer, 'countText', 'commentsCount')

3095

3096

if expected_comment_count:

3097

tracker['est_total'] = expected_comment_count

3098

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3099

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3100

3101

sort_menu_item = try_get(

3102

comments_header_renderer,

3103

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3104

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3105

3106

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3107

if not _continuation:

3108

continue

3109

3110

sort_text = str_or_none(sort_menu_item.get('title'))

3111

if not sort_text:

3112

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3113

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3118

if not parent:

3119

tracker['current_page_thread'] = 0

3120

for content in contents:

3121

if not parent and tracker['total_parent_comments'] >= max_parents:

3122

yield

3123

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3124

comment_renderer = get_first(

3125

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3126

expected_type=dict, default={})

3127

3128

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3133

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3134

yield comment

3135

3136

# Attempt to get the replies

3137

comment_replies_renderer = try_get(

3138

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3139

3140

if comment_replies_renderer:

3141

tracker['current_page_thread'] += 1

3142

comment_entries_iter = self._comment_entries(

3143

comment_replies_renderer, ytcfg, video_id,

3144

parent=comment.get('id'), tracker=tracker)

3145

yield from itertools.islice(comment_entries_iter, min(

3146

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3147

3148

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3154

total_parent_comments=0,

3155

total_reply_comments=0)

3156

3157

# TODO: Deprecated

3158

# YouTube comments have a max depth of 2

3159

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3160

if max_depth:

3161

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3162

'Set max replies in the max-comments extractor argument instead')

3163

if max_depth == 1 and parent:

3164

return

3165

3166

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3167

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3168

3169

continuation = self._extract_continuation(root_continuation_data)

3170

3171

response = None

3172

is_forced_continuation = False

3173

is_first_continuation = parent is None

3174

if is_first_continuation and not continuation:

3175

# Sometimes you can get comments by generating the continuation yourself,

3176

# even if YouTube initially reports them being disabled - e.g. stories comments.

3177

# Note: if the comment section is actually disabled, YouTube may return a response with

3178

# required check_get_keys missing. So we will disable that check initially in this case.

3179

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3180

is_forced_continuation = True

3181

3182

for page_num in itertools.count(0):

3183

if not continuation:

3184

break

3185

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3186

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3187

if page_num == 0:

3188

if is_first_continuation:

3189

note_prefix = 'Downloading comment section API JSON'

3190

else:

3191

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3192

tracker['current_page_thread'], comment_prog_str)

3193

else:

3194

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3195

' ' if parent else '', ' replies' if parent else '',

3196

page_num, comment_prog_str)

3197

3198

response = self._extract_response(

3199

item_id=None, query=continuation,

3200

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3201

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3202

is_forced_continuation = False

3203

continuation_contents = traverse_obj(

3204

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3205

3206

continuation = None

3207

for continuation_section in continuation_contents:

3208

continuation_items = traverse_obj(

3209

continuation_section,

3210

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3211

get_all=False, expected_type=list) or []

3212

if is_first_continuation:

3213

continuation = extract_header(continuation_items)

3214

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3228

if message and not parent and tracker['running_total'] == 0:

3229

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3230

3231

@staticmethod

3232

def _generate_comment_continuation(video_id):

3233

"""

3234

Generates initial comment section continuation token from given video id

3235

"""

3236

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3237

return base64.b64encode(token.encode()).decode()

3238

3239

def _get_comments(self, ytcfg, video_id, contents, webpage):

3240

"""Entry for comment extraction"""

3241

def _real_comment_extract(contents):

3242

renderer = next((

3243

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3244

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3245

yield from self._comment_entries(renderer, ytcfg, video_id)

3246

3247

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3248

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3249

3250

@staticmethod

3251

def _get_checkok_params():

3252

return {'contentCheckOk': True, 'racyCheckOk': True}

3253

3254

@classmethod

3255

def _generate_player_context(cls, sts=None):

3256

context = {

3257

'html5Preference': 'HTML5_PREF_WANTS',

3258

}

3259

if sts is not None:

3260

context['signatureTimestamp'] = sts

3261

return {

3262

'playbackContext': {

3263

'contentPlaybackContext': context

3264

},

3265

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3270

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3271

return True

3272

3273

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3274

AGE_GATE_REASONS = (

3275

'confirm your age', 'age-restricted', 'inappropriate', # reason

3276

'age_verification_required', 'age_check_required', # status

3277

)

3278

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3279

3280

@staticmethod

3281

def _is_unplayable(player_response):

3282

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3283

3284

_STORY_PLAYER_PARAMS = '8AEB'

3285

3286

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3287

3288

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3289

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3290

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3291

headers = self.generate_api_headers(

3292

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3298

yt_query['params'] = self._STORY_PLAYER_PARAMS

3299

3300

yt_query.update(self._generate_player_context(sts))

3301

return self._extract_response(

3302

item_id=video_id, ep='player', query=yt_query,

3303

ytcfg=player_ytcfg, headers=headers, fatal=True,

3304

default_client=client,

3305

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3306

) or None

3307

3308

def _get_requested_clients(self, url, smuggled_data):

3309

requested_clients = []

3310

default = ['android', 'web']

3311

allowed_clients = sorted(

3312

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3313

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3314

for client in self._configuration_arg('player_client'):

3315

if client in allowed_clients:

3316

requested_clients.append(client)

3317

elif client == 'default':

3318

requested_clients.extend(default)

3319

elif client == 'all':

3320

requested_clients.extend(allowed_clients)

3321

else:

3322

self.report_warning(f'Skipping unsupported client {client}')

3323

if not requested_clients:

3324

requested_clients = default

3325

3326

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3327

requested_clients.extend(

3328

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3329

3330

return orderedSet(requested_clients)

3331

3332

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3333

initial_pr = None

3334

if webpage:

3335

initial_pr = self._search_json(

3336

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3337

3338

all_clients = set(clients)

3339

clients = clients[::-1]

3340

prs = []

3341

3342

def append_client(*client_names):

3343

""" Append the first client name that exists but not already used """

3344

for client_name in client_names:

3345

actual_client = _split_innertube_client(client_name)[0]

3346

if actual_client in INNERTUBE_CLIENTS:

3347

if actual_client not in all_clients:

3348

clients.append(client_name)

3349

all_clients.add(actual_client)

3350

return

3351

3352

# Android player_response does not have microFormats which are needed for

3353

# extraction of some data. So we return the initial_pr with formats

3354

# stripped out even if not requested by the user

3355

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3356

if initial_pr:

3357

pr = dict(initial_pr)

3358

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3363

player_url = None

3364

while clients:

3365

client, base_client, variant = _split_innertube_client(clients.pop())

3366

player_ytcfg = master_ytcfg if client == 'web' else {}

3367

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3368

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3369

3370

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3371

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3372

if 'js' in self._configuration_arg('player_skip'):

3373

require_js_player = False

3374

player_url = None

3375

3376

if not player_url and not tried_iframe_fallback and require_js_player:

3377

player_url = self._download_player_url(video_id)

3378

tried_iframe_fallback = True

3379

3380

try:

3381

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3382

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3383

except ExtractorError as e:

3384

if last_error:

3385

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3391

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3392

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3393

if pr_video_id and pr_video_id != video_id:

3394

self.report_warning(

3395

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3400

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3401

append_client(f'{base_client}_creator')

3402

elif self._is_agegated(pr):

3403

if variant == 'tv_embedded':

3404

append_client(f'{base_client}_embedded')

3405

elif not variant:

3406

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3412

return prs, player_url

3413

3414

def _needs_live_processing(self, live_status, duration):

3415

if (live_status == 'is_live' and self.get_param('live_from_start')

3416

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3417

return live_status

3418

3419

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3420

itags, stream_ids = {}, []

3421

itag_qualities, res_qualities = {}, {0: None}

3422

q = qualities([

3423

# Normally tiny is the smallest video-only formats. But

3424

# audio-only formats with unknown quality may get tagged as tiny

3425

'tiny',

3426

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3427

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3428

])

3429

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3430

3431

for fmt in streaming_formats:

3432

if fmt.get('targetDurationSec'):

3433

continue

3434

3435

itag = str_or_none(fmt.get('itag'))

3436

audio_track = fmt.get('audioTrack') or {}

3437

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3438

if stream_id in stream_ids:

3439

continue

3440

3441

quality = fmt.get('quality')

3442

height = int_or_none(fmt.get('height'))

3443

if quality == 'tiny' or not quality:

3444

quality = fmt.get('audioQuality', '').lower() or quality

3445

# The 3gp format (17) in android client has a quality of "small",

3446

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3452

if height:

3453

res_qualities[height] = quality

3454

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3455

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3456

# number of fragment that would subsequently requested with (`&sq=N`)

3457

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3458

continue

3459

3460

fmt_url = fmt.get('url')

3461

if not fmt_url:

3462

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3463

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3464

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3465

if not all((sc, fmt_url, player_url, encrypted_sig)):

3466

continue

3467

try:

3468

fmt_url += '&%s=%s' % (

3469

traverse_obj(sc, ('sp', -1)) or 'signature',

3470

self._decrypt_signature(encrypted_sig, video_id, player_url)

3471

)

3472

except ExtractorError as e:

3473

self.report_warning('Signature extraction failed: Some formats may be missing',

3474

video_id=video_id, only_once=True)

3475

self.write_debug(e, only_once=True)

3476

continue

3477

3478

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3483

fmt_url = update_url_query(fmt_url, {

3484

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3485

})

3486

except ExtractorError as e:

3487

phantomjs_hint = ''

3488

if isinstance(e, JSInterpreter.Exception):

3489

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3490

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3491

if player_url:

3492

self.report_warning(

3493

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3494

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3495

self.write_debug(e, only_once=True)

3496

else:

3497

self.report_warning(

3498

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3499

video_id=video_id, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3504

stream_ids.append(stream_id)

3505

3506

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3507

language_preference = (

3508

10 if audio_track.get('audioIsDefault') and 10

3509

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3510

else -1)

3511

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3512

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3513

# Make sure to avoid false positives with small duration differences.

3514

# E.g. __2ABJjxzNo, ySuUZEjARPY

3515

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3516

if is_damaged:

3517

self.report_warning(

3518

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3519

dct = {

3520

'asr': int_or_none(fmt.get('audioSampleRate')),

3521

'filesize': int_or_none(fmt.get('contentLength')),

3522

'format_id': itag,

3523

'format_note': join_nonempty(

3524

'%s%s' % (audio_track.get('displayName') or '',

3525

' (default)' if language_preference > 0 else ''),

3526

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3527

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3528

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3529

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3530

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3531

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3532

'fps': int_or_none(fmt.get('fps')) or None,

3533

'audio_channels': fmt.get('audioChannels'),

3534

'height': height,

3535

'quality': q(quality),

3536

'has_drm': bool(fmt.get('drmFamilies')),

3537

'tbr': tbr,

3538

'url': fmt_url,

3539

'width': int_or_none(fmt.get('width')),

3540

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3541

'desc' if language_preference < -1 else ''),

3542

'language_preference': language_preference,

3543

# Strictly de-prioritize damaged and 3gp formats

3544

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3545

}

3546

mime_mobj = re.match(

3547

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3548

if mime_mobj:

3549

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3550

dct.update(parse_codecs(mime_mobj.group(2)))

3551

no_audio = dct.get('acodec') == 'none'

3552

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3558

dct['downloader_options'] = {

3559

# Youtube throttles chunks >~10M

3560

'http_chunk_size': 10485760,

3561

}

3562

if dct.get('ext'):

3563

dct['container'] = dct['ext'] + '_dash'

3564

yield dct

3565

3566

needs_live_processing = self._needs_live_processing(live_status, duration)

3567

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3568

3569

skip_manifests = set(self._configuration_arg('skip'))

3570

if (not self.get_param('youtube_include_hls_manifest', True)

3571

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3572

or needs_live_processing and skip_bad_formats):

3573

skip_manifests.add('hls')

3574

3575

if not self.get_param('youtube_include_dash_manifest', True):

3576

skip_manifests.add('dash')

3577

if self._configuration_arg('include_live_dash'):

3578

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3579

'Use include_incomplete_formats extractor argument instead')

3580

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3581

skip_manifests.add('dash')

3582

3583

def process_manifest_format(f, proto, itag):

3584

if itag in itags:

3585

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3586

return False

3587

itag = f'{itag}-{proto}'

3588

if itag:

3589

f['format_id'] = itag

3590

itags[itag] = proto

3591

3592

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3593

if f['quality'] == -1 and f.get('height'):

3594

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3599

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3600

if hls_manifest_url:

3601

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3602

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3603

subtitles = self._merge_subtitles(subs, subtitles)

3604

for f in fmts:

3605

if process_manifest_format(f, 'hls', self._search_regex(

3606

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3607

yield f

3608

3609

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3610

if dash_manifest_url:

3611

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3612

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3613

for f in formats:

3614

if process_manifest_format(f, 'dash', f['format_id']):

3615

f['filesize'] = int_or_none(self._search_regex(

3616

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3617

if needs_live_processing:

3618

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3624

spec = get_first(

3625

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3626

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3631

args = args.split('#')

3632

counts = list(map(int_or_none, args[:5]))

3633

if len(args) != 8 or not all(counts):

3634

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3635

continue

3636

width, height, frame_count, cols, rows = counts

3637

N, sigh = args[6:]

3638

3639

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3640

fragment_count = frame_count / (cols * rows)

3641

fragment_duration = duration / fragment_count

3642

yield {

3643

'format_id': f'sb{i}',

3644

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3657

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3658

} for j in range(math.ceil(fragment_count))],

3659

}

3660

3661

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3662

webpage = None

3663

if 'webpage' not in self._configuration_arg('player_skip'):

3664

query = {'bpctr': '9999999999', 'has_verified': '1'}

3665

if smuggled_data.get('is_story'):

3666

query['pp'] = self._STORY_PLAYER_PARAMS

3667

webpage = self._download_webpage(

3668

webpage_url, video_id, fatal=False, query=query)

3669

3670

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3671

3672

player_responses, player_url = self._extract_player_responses(

3673

self._get_requested_clients(url, smuggled_data),

3674

video_id, webpage, master_ytcfg, smuggled_data)

3675

3676

return webpage, master_ytcfg, player_responses, player_url

3677

3678

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3679

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3680

is_live = get_first(video_details, 'isLive')

3681

if is_live is None:

3682

is_live = get_first(live_broadcast_details, 'isLiveNow')

3683

live_content = get_first(video_details, 'isLiveContent')

3684

is_upcoming = get_first(video_details, 'isUpcoming')

3685

if is_live is None and is_upcoming or live_content is False:

3686

is_live = False

3687

if is_upcoming is None and (live_content or is_live):

3688

is_upcoming = False

3689

post_live = get_first(video_details, 'isPostLiveDvr')

3690

live_status = ('post_live' if post_live

3691

else 'is_live' if is_live

3692

else 'is_upcoming' if is_upcoming

3693

else None if None in (is_live, is_upcoming, live_content)

3694

else 'was_live' if live_content else 'not_live')

3695

3696

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3697

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3698

3699

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3700

3701

def _real_extract(self, url):

3702

url, smuggled_data = unsmuggle_url(url, {})

3703

video_id = self._match_id(url)

3704

3705

base_url = self.http_scheme() + '//www.youtube.com/'

3706

webpage_url = base_url + 'watch?v=' + video_id

3707

3708

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3709

3710

playability_statuses = traverse_obj(

3711

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3712

3713

trailer_video_id = get_first(

3714

playability_statuses,

3715

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3716

expected_type=str)

3717

if trailer_video_id:

3718

return self.url_result(

3719

trailer_video_id, self.ie_key(), trailer_video_id)

3720

3721

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3722

if webpage else (lambda x: None))

3723

3724

video_details = traverse_obj(

3725

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3726

microformats = traverse_obj(

3727

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3728

expected_type=dict, default=[])

3729

3730

translated_title = self._get_text(microformats, (..., 'title'))

3731

video_title = (self._preferred_lang and translated_title

3732

or get_first(video_details, 'title') # primary

3733

or translated_title

3734

or search_meta(['og:title', 'twitter:title', 'title']))

3735

translated_description = self._get_text(microformats, (..., 'description'))

3736

original_description = get_first(video_details, 'shortDescription')

3737

video_description = (

3738

self._preferred_lang and translated_description

3739

# If original description is blank, it will be an empty string.

3740

# Do not prefer translated description in this case.

3741

or original_description if original_description is not None else translated_description)

3742

3743

multifeed_metadata_list = get_first(

3744

player_responses,

3745

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3746

expected_type=str)

3747

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3748

if self.get_param('noplaylist'):

3749

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3754

# Unquote should take place before split on comma (,) since textual

3755

# fields may contain comma as well (see

3756

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3757

feed_data = urllib.parse.parse_qs(

3758

urllib.parse.unquote_plus(feed))

3759

3760

def feed_entry(name):

3761

return try_get(

3762

feed_data, lambda x: x[name][0], str)

3763

3764

feed_id = feed_entry('id')

3765

if not feed_id:

3766

continue

3767

feed_title = feed_entry('title')

3768

title = video_title

3769

if feed_title:

3770

title += ' (%s)' % feed_title

3771

entries.append({

3772

'_type': 'url_transparent',

3773

'ie_key': 'Youtube',

3774

'url': smuggle_url(

3775

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3776

{'force_singlefeed': True}),

3777

'title': title,

3778

})

3779

feed_ids.append(feed_id)

3780

self.to_screen(

3781

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3782

% (', '.join(feed_ids), video_id))

3783

return self.playlist_result(

3784

entries, video_id, video_title, video_description)

3785

3786

duration = int_or_none(

3787

get_first(video_details, 'lengthSeconds')

3788

or get_first(microformats, 'lengthSeconds')

3789

or parse_duration(search_meta('duration'))) or None

3790

3791

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3792

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3793

if live_status == 'post_live':

3794

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3795

3796

if not formats:

3797

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3798

self.report_drm(video_id)

3799

pemr = get_first(

3800

playability_statuses,

3801

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3802

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3803

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3804

if subreason:

3805

if subreason == 'The uploader has not made this video available in your country.':

3806

countries = get_first(microformats, 'availableCountries')

3807

if not countries:

3808

regions_allowed = search_meta('regionsAllowed')

3809

countries = regions_allowed.split(',') if regions_allowed else None

3810

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3811

reason += f'. {subreason}'

3812

if reason:

3813

self.raise_no_formats(reason, expected=True)

3814

3815

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3816

if not keywords and webpage:

3817

keywords = [

3818

unescapeHTML(m.group('content'))

3819

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3820

for keyword in keywords:

3821

if keyword.startswith('yt:stretch='):

3822

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3823

if mobj:

3824

# NB: float is intentional for forcing float division

3825

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3830

f['stretched_ratio'] = ratio

3831

break

3832

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3833

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3834

if thumbnail_url:

3835

thumbnails.append({

3836

'url': thumbnail_url,

3837

})

3838

original_thumbnails = thumbnails.copy()

3839

3840

# The best resolution thumbnails sometimes does not appear in the webpage

3841

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3842

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3843

thumbnail_names = [

3844

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3845

# in resolution, these are not the custom thumbnail. So de-prioritize them

3846

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3847

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3848

]

3849

n_thumbnail_names = len(thumbnail_names)

3850

thumbnails.extend({

3851

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3852

video_id=video_id, name=name, ext=ext,

3853

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

3854

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3855

for thumb in thumbnails:

3856

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3857

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3858

self._remove_duplicate_formats(thumbnails)

3859

self._downloader._sort_thumbnails(original_thumbnails)

3860

3861

category = get_first(microformats, 'category') or search_meta('genre')

3862

channel_id = str_or_none(

3863

get_first(video_details, 'channelId')

3864

or get_first(microformats, 'externalChannelId')

3865

or search_meta('channelId'))

3866

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3867

3868

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3869

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3870

if not duration and live_end_time and live_start_time:

3871

duration = live_end_time - live_start_time

3872

3873

needs_live_processing = self._needs_live_processing(live_status, duration)

3874

3875

def is_bad_format(fmt):

3876

if needs_live_processing and not fmt.get('is_from_start'):

3877

return True

3878

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

3879

and fmt.get('protocol') == 'http_dash_segments'):

3880

return True

3881

3882

for fmt in filter(is_bad_format, formats):

3883

fmt['preference'] = (fmt.get('preference') or -1) - 10

3884

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

3885

3886

if needs_live_processing:

3887

self._prepare_live_from_start_formats(

3888

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

3889

3890

formats.extend(self._extract_storyboard(player_responses, duration))

3891

3892

# source_preference is lower for throttled/potentially damaged formats

3893

self._sort_formats(formats, (

3894

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3899

'formats': formats,

3900

'thumbnails': thumbnails,

3901

# The best thumbnail that we are sure exists. Prevents unnecessary

3902

# URL checking if user don't care about getting the best possible thumbnail

3903

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3904

'description': video_description,

3905

'uploader': get_first(video_details, 'author'),

3906

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3907

'uploader_url': owner_profile_url,

3908

'channel_id': channel_id,

3909

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3910

'duration': duration,

3911

'view_count': int_or_none(

3912

get_first((video_details, microformats), (..., 'viewCount'))

3913

or search_meta('interactionCount')),

3914

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3915

'age_limit': 18 if (

3916

get_first(microformats, 'isFamilySafe') is False

3917

or search_meta('isFamilyFriendly') == 'false'

3918

or search_meta('og:restrictions:age') == '18+') else 0,

3919

'webpage_url': webpage_url,

3920

'categories': [category] if category else None,

3921

'tags': keywords,

3922

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3923

'live_status': live_status,

3924

'release_timestamp': live_start_time,

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3929

if pctr:

3930

def get_lang_code(track):

3931

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3932

or track.get('languageCode'))

3933

3934

# Converted into dicts to remove duplicates

3935

captions = {

3936

get_lang_code(sub): sub

3937

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3938

translation_languages = {

3939

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3940

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3941

3942

def process_language(container, base_url, lang_code, sub_name, query):

3943

lang_subs = container.setdefault(lang_code, [])

3944

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3955

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3956

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3957

for lang_code, caption_track in captions.items():

3958

base_url = caption_track.get('baseUrl')

3959

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3960

if not base_url:

3961

continue

3962

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3963

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3968

if not caption_track.get('isTranslatable'):

3969

continue

3970

for trans_code, trans_name in translation_languages.items():

3971

if not trans_code:

3972

continue

3973

orig_trans_code = trans_code

3974

if caption_track.get('kind') != 'asr':

3975

if not get_translated_subs:

3976

continue

3977

trans_code += f'-{lang_code}'

3978

trans_name += format_field(lang_name, None, ' from %s')

3979

# Add an "-orig" label to the original language so that it can be distinguished.

3980

# The subs are returned without "-orig" as well for compatibility

3981

if lang_code == f'a-{orig_trans_code}':

3982

process_language(

3983

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3984

# Setting tlang=lang returns damaged subtitles.

3985

process_language(automatic_captions, base_url, trans_code, trans_name,

3986

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3987

3988

info['automatic_captions'] = automatic_captions

3989

info['subtitles'] = subtitles

3990

3991

parsed_url = urllib.parse.urlparse(url)

3992

for component in [parsed_url.fragment, parsed_url.query]:

3993

query = urllib.parse.parse_qs(component)

3994

for k, v in query.items():

3995

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3996

d_k += '_time'

3997

if d_k not in info and k in s_ks:

3998

info[d_k] = parse_duration(query[k][0])

3999

4000

# Youtube Music Auto-generated description

4001

if video_description:

4002

mobj = re.search(

4003

r'''(?xs)

4004

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4005

(?P<album>[^\n]+)

4006

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4007

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4008

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4009

.+\nAuto-generated\ by\ YouTube\.\s*$

4010

''', video_description)

4011

if mobj:

4012

release_year = mobj.group('release_year')

4013

release_date = mobj.group('release_date')

4014

if release_date:

4015

release_date = release_date.replace('-', '')

4016

if not release_year:

4017

release_year = release_date[:4]

4018

info.update({

4019

'album': mobj.group('album'.strip()),

4020

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4021

'track': mobj.group('track').strip(),

4022

'release_date': release_date,

4023

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4029

if not initial_data:

4030

query = {'videoId': video_id}

4031

query.update(self._get_checkok_params())

4032

initial_data = self._extract_response(

4033

item_id=video_id, ep='next', fatal=False,

4034

ytcfg=master_ytcfg, query=query,

4035

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4036

note='Downloading initial data API JSON')

4037

4038

info['comment_count'] = traverse_obj(initial_data, (

4039

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4040

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4041

), (

4042

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4043

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4044

), expected_type=int_or_none, get_all=False)

4045

4046

try: # This will error if there is no livechat

4047

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4048

except (KeyError, IndexError, TypeError):

4049

pass

4050

else:

4051

info.setdefault('subtitles', {})['live_chat'] = [{

4052

# url is needed to set cookies

4053

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4054

'video_id': video_id,

4055

'ext': 'json',

4056

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4057

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4063

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4064

or self._extract_chapters_from_description(video_description, duration)

4065

or None)

4066

4067

contents = traverse_obj(

4068

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4069

expected_type=list, default=[])

4070

4071

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4072

if vpir:

4073

stl = vpir.get('superTitleLink')

4074

if stl:

4075

stl = self._get_text(stl)

4076

if try_get(

4077

vpir,

4078

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4079

info['location'] = stl

4080

else:

4081

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4082

if mobj:

4083

info.update({

4084

'series': mobj.group(1),

4085

'season_number': int(mobj.group(2)),

4086

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4095

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4096

default=[]))

4097

for tbr in tbrs:

4098

for getter, regex in [(

4099

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4100

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4101

lambda x: x['accessibility'],

4102

lambda x: x['accessibilityData']['accessibilityData'],

4103

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4104

label = (try_get(tbr, getter, dict) or {}).get('label')

4105

if label:

4106

mobj = re.match(regex, label)

4107

if mobj:

4108

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4109

break

4110

sbr_tooltip = try_get(

4111

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4112

if sbr_tooltip:

4113

like_count, dislike_count = sbr_tooltip.split(' / ')

4114

info.update({

4115

'like_count': str_to_int(like_count),

4116

'dislike_count': str_to_int(dislike_count),

4117

})

4118

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4119

if vsir:

4120

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4121

info.update({

4122

'channel': self._get_text(vor, 'title'),

4123

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4128

list) or []

4129

multiple_songs = False

4130

for row in rows:

4131

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4132

multiple_songs = True

4133

break

4134

for row in rows:

4135

mrr = row.get('metadataRowRenderer') or {}

4136

mrr_title = mrr.get('title')

4137

if not mrr_title:

4138

continue

4139

mrr_title = self._get_text(mrr, 'title')

4140

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4141

if mrr_title == 'License':

4142

info['license'] = mrr_contents_text

4143

elif not multiple_songs:

4144

if mrr_title == 'Album':

4145

info['album'] = mrr_contents_text

4146

elif mrr_title == 'Artist':

4147

info['artist'] = mrr_contents_text

4148

elif mrr_title == 'Song':

4149

info['track'] = mrr_contents_text

4150

4151

fallbacks = {

4152

'channel': 'uploader',

4153

'channel_id': 'uploader_id',

4154

'channel_url': 'uploader_url',

4155

}

4156

4157

# The upload date for scheduled, live and past live streams / premieres in microformats

4158

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4159

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4160

upload_date = (

4161

unified_strdate(get_first(microformats, 'uploadDate'))

4162

or unified_strdate(search_meta('uploadDate')))

4163

if not upload_date or (

4164

live_status in ('not_live', None)

4165

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4166

):

4167

upload_date = strftime_or_none(

4168

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4169

info['upload_date'] = upload_date

4170

4171

for to, frm in fallbacks.items():

4172

if not info.get(to):

4173

info[to] = info.get(frm)

4174

4175

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4181

4182

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4183

or get_first(video_details, 'isPrivate', expected_type=bool))

4184

4185

info['availability'] = (

4186

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4187

else self._availability(

4188

is_private=is_private,

4189

needs_premium=(

4190

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4191

or False if initial_data and is_private is not None else None),

4192

needs_subscription=(

4193

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4194

or False if initial_data and is_private is not None else None),

4195

needs_auth=info['age_limit'] >= 18,

4196

is_unlisted=None if is_private is None else (

4197

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4198

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4199

4200

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4201

4202

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4208

4209

@staticmethod

4210

def passthrough_smuggled_data(func):

4211

def _smuggle(entries, smuggled_data):

4212

for entry in entries:

4213

# TODO: Convert URL to music.youtube instead.

4214

# Do we need to passthrough any other smuggled_data?

4215

entry['url'] = smuggle_url(entry['url'], smuggled_data)

4216

yield entry

4217

4218

@functools.wraps(func)

4219

def wrapper(self, url):

4220

url, smuggled_data = unsmuggle_url(url, {})

4221

if self.is_music_url(url):

4222

smuggled_data['is_music_url'] = True

4223

info_dict = func(self, url, smuggled_data)

4224

if smuggled_data and info_dict.get('entries'):

4225

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4230

channel_id = self._html_search_meta(

4231

'channelId', webpage, 'channel id', default=None)

4232

if channel_id:

4233

return channel_id

4234

channel_url = self._html_search_meta(

4235

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4236

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4237

'twitter:app:url:googleplay'), webpage, 'channel url')

4238

return self._search_regex(

4239

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4240

channel_url, 'channel id')

4241

4242

@staticmethod

4243

def _extract_basic_item_renderer(item):

4244

# Modified from _extract_grid_item_renderer

4245

known_basic_renderers = (

4246

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4247

)

4248

for key, renderer in item.items():

4249

if not isinstance(renderer, dict):

4250

continue

4251

elif key in known_basic_renderers:

4252

return renderer

4253

elif key.startswith('grid') and key.endswith('Renderer'):

4254

return renderer

4255

4256

def _grid_entries(self, grid_renderer):

4257

for item in grid_renderer['items']:

4258

if not isinstance(item, dict):

4259

continue

4260

renderer = self._extract_basic_item_renderer(item)

4261

if not isinstance(renderer, dict):

4262

continue

4263

title = self._get_text(renderer, 'title')

4264

4265

# playlist

4266

playlist_id = renderer.get('playlistId')

4267

if playlist_id:

4268

yield self.url_result(

4269

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4270

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4275

if video_id:

4276

yield self._extract_video(renderer)

4277

continue

4278

# channel

4279

channel_id = renderer.get('channelId')

4280

if channel_id:

4281

yield self.url_result(

4282

'https://www.youtube.com/channel/%s' % channel_id,

4283

ie=YoutubeTabIE.ie_key(), video_title=title)

4284

continue

4285

# generic endpoint URL support

4286

ep_url = urljoin('https://www.youtube.com/', try_get(

4287

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4288

str))

4289

if ep_url:

4290

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4291

if ie.suitable(ep_url):

4292

yield self.url_result(

4293

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4294

break

4295

4296

def _music_reponsive_list_entry(self, renderer):

4297

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4298

if video_id:

4299

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4300

ie=YoutubeIE.ie_key(), video_id=video_id)

4301

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4302

if playlist_id:

4303

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4304

if video_id:

4305

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4306

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4307

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4308

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4309

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4310

if browse_id:

4311

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4312

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4313

4314

def _shelf_entries_from_content(self, shelf_renderer):

4315

content = shelf_renderer.get('content')

4316

if not isinstance(content, dict):

4317

return

4318

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4319

if renderer:

4320

# TODO: add support for nested playlists so each shelf is processed

4321

# as separate playlist

4322

# TODO: this includes only first N items

4323

yield from self._grid_entries(renderer)

4324

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4330

ep = try_get(

4331

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4332

str)

4333

shelf_url = urljoin('https://www.youtube.com', ep)

4334

if shelf_url:

4335

# Skipping links to another channels, note that checking for

4336

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4337

# will not work

4338

if skip_channels and '/channels?' in shelf_url:

4339

return

4340

title = self._get_text(shelf_renderer, 'title')

4341

yield self.url_result(shelf_url, video_title=title)

4342

# Shelf may not contain shelf URL, fallback to extraction from content

4343

yield from self._shelf_entries_from_content(shelf_renderer)

4344

4345

def _playlist_entries(self, video_list_renderer):

4346

for content in video_list_renderer['contents']:

4347

if not isinstance(content, dict):

4348

continue

4349

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4350

if not isinstance(renderer, dict):

4351

continue

4352

video_id = renderer.get('videoId')

4353

if not video_id:

4354

continue

4355

yield self._extract_video(renderer)

4356

4357

def _rich_entries(self, rich_grid_renderer):

4358

renderer = traverse_obj(

4359

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4360

video_id = renderer.get('videoId')

4361

if not video_id:

4362

return

4363

yield self._extract_video(renderer)

4364

4365

def _video_entry(self, video_renderer):

4366

video_id = video_renderer.get('videoId')

4367

if video_id:

4368

return self._extract_video(video_renderer)

4369

4370

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4371

url = urljoin('https://youtube.com', traverse_obj(

4372

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4373

if url:

4374

return self.url_result(

4375

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4376

4377

def _post_thread_entries(self, post_thread_renderer):

4378

post_renderer = try_get(

4379

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4380

if not post_renderer:

4381

return

4382

# video attachment

4383

video_renderer = try_get(

4384

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4385

video_id = video_renderer.get('videoId')

4386

if video_id:

4387

entry = self._extract_video(video_renderer)

4388

if entry:

4389

yield entry

4390

# playlist attachment

4391

playlist_id = try_get(

4392

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4393

if playlist_id:

4394

yield self.url_result(

4395

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4396

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4397

# inline video links

4398

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4399

for run in runs:

4400

if not isinstance(run, dict):

4401

continue

4402

ep_url = try_get(

4403

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4404

if not ep_url:

4405

continue

4406

if not YoutubeIE.suitable(ep_url):

4407

continue

4408

ep_video_id = YoutubeIE._match_id(ep_url)

4409

if video_id == ep_video_id:

4410

continue

4411

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4412

4413

def _post_thread_continuation_entries(self, post_thread_continuation):

4414

contents = post_thread_continuation.get('contents')

4415

if not isinstance(contents, list):

4416

return

4417

for content in contents:

4418

renderer = content.get('backstagePostThreadRenderer')

4419

if isinstance(renderer, dict):

4420

yield from self._post_thread_entries(renderer)

4421

continue

4422

renderer = content.get('videoRenderer')

4423

if isinstance(renderer, dict):

4424

yield self._video_entry(renderer)

4425

4426

r''' # unused

4427

def _rich_grid_entries(self, contents):

4428

for content in contents:

4429

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4430

if video_renderer:

4431

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4437

for url in traverse_obj(renderer, (

4438

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4439

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4440

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4441

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4442

4443

def _extract_entries(self, parent_renderer, continuation_list):

4444

# continuation_list is modified in-place with continuation_list = [continuation_token]

4445

continuation_list[:] = [None]

4446

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4447

for content in contents:

4448

if not isinstance(content, dict):

4449

continue

4450

is_renderer = traverse_obj(

4451

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4452

expected_type=dict)

4453

if not is_renderer:

4454

if content.get('richItemRenderer'):

4455

for entry in self._rich_entries(content['richItemRenderer']):

4456

yield entry

4457

continuation_list[0] = self._extract_continuation(parent_renderer)

4458

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4459

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4460

yield from self._report_history_entries(table)

4461

continuation_list[0] = self._extract_continuation(table)

4462

continue

4463

4464

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4465

for isr_content in isr_contents:

4466

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4471

'gridRenderer': self._grid_entries,

4472

'reelShelfRenderer': self._grid_entries,

4473

'shelfRenderer': self._shelf_entries,

4474

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4475

'backstagePostThreadRenderer': self._post_thread_entries,

4476

'videoRenderer': lambda x: [self._video_entry(x)],

4477

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4478

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4479

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4480

}

4481

for key, renderer in isr_content.items():

4482

if key not in known_renderers:

4483

continue

4484

for entry in known_renderers[key](renderer):

4485

if entry:

4486

yield entry

4487

continuation_list[0] = self._extract_continuation(renderer)

4488

break

4489

4490

if not continuation_list[0]:

4491

continuation_list[0] = self._extract_continuation(is_renderer)

4492

4493

if not continuation_list[0]:

4494

continuation_list[0] = self._extract_continuation(parent_renderer)

4495

4496

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4497

continuation_list = [None]

4498

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4499

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4504

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4505

yield from extract_entries(parent_renderer)

4506

continuation = continuation_list[0]

4507

4508

for page_num in itertools.count(1):

4509

if not continuation:

4510

break

4511

headers = self.generate_api_headers(

4512

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4513

response = self._extract_response(

4514

item_id=f'{item_id} page {page_num}',

4515

query=continuation, headers=headers, ytcfg=ytcfg,

4516

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4521

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4522

visitor_data = self._extract_visitor_data(response) or visitor_data

4523

4524

known_renderers = {

4525

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4526

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4527

'gridVideoRenderer': (self._grid_entries, 'items'),

4528

'gridChannelRenderer': (self._grid_entries, 'items'),

4529

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4530

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4531

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4532

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4533

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4534

'playlistVideoListContinuation': (self._playlist_entries, None),

4535

'gridContinuation': (self._grid_entries, None),

4536

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4537

'sectionListContinuation': (extract_entries, None), # for feeds

4538

}

4539

4540

continuation_items = traverse_obj(response, (

4541

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4542

'appendContinuationItemsAction', 'continuationItems'

4543

), 'continuationContents', get_all=False)

4544

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4545

4546

video_items_renderer = None

4547

for key in continuation_item.keys():

4548

if key not in known_renderers:

4549

continue

4550

func, parent_key = known_renderers[key]

4551

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4552

continuation_list = [None]

4553

yield from func(video_items_renderer)

4554

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4555

4556

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4561

for tab in tabs:

4562

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4563

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4568

4569

def _extract_uploader(self, data):

4570

uploader = {}

4571

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4572

owner = try_get(

4573

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4574

if owner:

4575

owner_text = owner.get('text')

4576

uploader['uploader'] = self._search_regex(

4577

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4578

uploader['uploader_id'] = try_get(

4579

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4580

uploader['uploader_url'] = urljoin(

4581

'https://www.youtube.com/',

4582

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4583

return filter_dict(uploader)

4584

4585

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4586

playlist_id = title = description = channel_url = channel_name = channel_id = None

4587

tags = []

4588

4589

selected_tab = self._extract_selected_tab(tabs)

4590

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4591

renderer = try_get(

4592

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4593

if renderer:

4594

channel_name = renderer.get('title')

4595

channel_url = renderer.get('channelUrl')

4596

channel_id = renderer.get('externalId')

4597

else:

4598

renderer = try_get(

4599

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4600

4601

if renderer:

4602

title = renderer.get('title')

4603

description = renderer.get('description', '')

4604

playlist_id = channel_id

4605

tags = renderer.get('keywords', '').split()

4606

4607

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4608

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4609

def _get_uncropped(url):

4610

return url_or_none((url or '').split('=')[0] + '=s0')

4611

4612

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4613

if avatar_thumbnails:

4614

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4615

if uncropped_avatar:

4616

avatar_thumbnails.append({

4617

'url': uncropped_avatar,

4618

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4623

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4624

for banner in channel_banners:

4625

banner['preference'] = -10

4626

4627

if channel_banners:

4628

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4629

if uncropped_banner:

4630

channel_banners.append({

4631

'url': uncropped_banner,

4632

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4637

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4638

4639

if playlist_id is None:

4640

playlist_id = item_id

4641

4642

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4643

last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))

4644

if title is None:

4645

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4646

title += format_field(selected_tab, 'title', ' - %s')

4647

title += format_field(selected_tab, 'expandedText', ' - %s')

4648

4649

metadata = {

4650

'playlist_id': playlist_id,

4651

'playlist_title': title,

4652

'playlist_description': description,

4653

'uploader': channel_name,

4654

'uploader_id': channel_id,

4655

'uploader_url': channel_url,

4656

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4657

'tags': tags,

4658

'view_count': self._get_count(playlist_stats, 1),

4659

'availability': self._extract_availability(data),

4660

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4661

'playlist_count': self._get_count(playlist_stats, 0),

4662

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4663

}

4664

if not channel_id:

4665

metadata.update(self._extract_uploader(data))

4666

metadata.update({

4667

'channel': metadata['uploader'],

4668

'channel_id': metadata['uploader_id'],

4669

'channel_url': metadata['uploader_url']})

4670

return self.playlist_result(

4671

self._entries(

4672

selected_tab, playlist_id, ytcfg,

4673

self._extract_account_syncid(ytcfg, data),

4674

self._extract_visitor_data(data, ytcfg)),

4675

**metadata)

4676

4677

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4678

first_id = last_id = response = None

4679

for page_num in itertools.count(1):

4680

videos = list(self._playlist_entries(playlist))

4681

if not videos:

4682

return

4683

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4684

if start >= len(videos):

4685

return

4686

yield from videos[start:]

4687

first_id = first_id or videos[0]['id']

4688

last_id = videos[-1]['id']

4689

watch_endpoint = try_get(

4690

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4691

headers = self.generate_api_headers(

4692

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4693

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4694

query = {

4695

'playlistId': playlist_id,

4696

'videoId': watch_endpoint.get('videoId') or last_id,

4697

'index': watch_endpoint.get('index') or len(videos),

4698

'params': watch_endpoint.get('params') or 'OAE%3D'

4699

}

4700

response = self._extract_response(

4701

item_id='%s page %d' % (playlist_id, page_num),

4702

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4703

check_get_keys='contents'

4704

)

4705

playlist = try_get(

4706

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4707

4708

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4709

title = playlist.get('title') or try_get(

4710

data, lambda x: x['titleText']['simpleText'], str)

4711

playlist_id = playlist.get('playlistId') or item_id

4712

4713

# Delegating everything except mix playlists to regular tab-based playlist URL

4714

playlist_url = urljoin(url, try_get(

4715

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4716

str))

4717

4718

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4719

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4720

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4721

4722

if playlist_url and playlist_url != url and not is_known_unviewable:

4723

return self.url_result(

4724

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4725

video_title=title)

4726

4727

return self.playlist_result(

4728

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4729

playlist_id=playlist_id, playlist_title=title)

4730

4731

def _extract_availability(self, data):

4732

"""

4733

Gets the availability of a given playlist/tab.

4734

Note: Unless YouTube tells us explicitly, we do not assume it is public

4735

@param data: response

4736

"""

4737

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4738

4739

player_header_privacy = traverse_obj(

4740

data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)

4741

4742

badges = self._extract_badges(renderer)

4743

4744

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4745

privacy_setting_icon = traverse_obj(

4746

renderer, (

4747

'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4748

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4749

get_all=False, expected_type=str)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4754

or player_header_privacy == 'PUBLIC'

4755

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4756

else self._availability(

4757

is_private=(

4758

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4759

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4760

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4761

is_unlisted=(

4762

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4763

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4764

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),

4765

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4766

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4771

sidebar_renderer = try_get(

4772

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4773

for item in sidebar_renderer:

4774

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4779

"""

4780

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4781

"""

4782

browse_id = params = None

4783

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4784

if not renderer:

4785

return

4786

menu_renderer = try_get(

4787

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4788

for menu_item in menu_renderer:

4789

if not isinstance(menu_item, dict):

4790

continue

4791

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4792

text = try_get(

4793

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4794

if not text or text.lower() != 'show unavailable videos':

4795

continue

4796

browse_endpoint = try_get(

4797

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4798

browse_id = browse_endpoint.get('browseId')

4799

params = browse_endpoint.get('params')

4800

break

4801

4802

headers = self.generate_api_headers(

4803

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4804

visitor_data=self._extract_visitor_data(data, ytcfg))

4805

query = {

4806

'params': params or 'wgYCCAA=',

4807

'browseId': browse_id or 'VL%s' % item_id

4808

}

4809

return self._extract_response(

4810

item_id=item_id, headers=headers, query=query,

4811

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4812

note='Downloading API JSON with unavailable videos')

4813

4814

@functools.cached_property

4815

def skip_webpage(self):

4816

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4817

4818

def _extract_webpage(self, url, item_id, fatal=True):

4819

webpage, data = None, None

4820

for retry in self.RetryManager(fatal=fatal):

4821

try:

4822

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4823

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4824

except ExtractorError as e:

4825

if isinstance(e.cause, network_exceptions):

4826

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4827

retry.error = e

4828

continue

4829

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4834

except ExtractorError as e:

4835

self._error_or_warning(e, fatal=fatal)

4836

break

4837

4838

# Sometimes youtube returns a webpage with incomplete ytInitialData

4839

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4840

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4841

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4847

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4848

if not ytcfg and self.is_authenticated:

4849

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4850

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4851

raise ExtractorError(

4852

f'{msg}. If you are not downloading private content, or '

4853

'your cookies are only for the first account and channel,'

4854

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4855

expected=True)

4856

self.report_warning(msg, only_once=True)

4857

4858

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4859

data = None

4860

if not self.skip_webpage:

4861

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4862

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4863

# Reject webpage data if redirected to home page without explicitly requesting

4864

selected_tab = self._extract_selected_tab(traverse_obj(

4865

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4866

if (url != 'https://www.youtube.com/feed/recommended'

4867

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4868

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4869

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4870

if fatal:

4871

raise ExtractorError(msg, expected=True)

4872

self.report_warning(msg, only_once=True)

4873

if not data:

4874

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4875

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4876

return data, ytcfg

4877

4878

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4879

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4880

resolve_response = self._extract_response(

4881

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4882

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4883

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4884

for ep_key, ep in endpoints.items():

4885

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4886

if params:

4887

return self._extract_response(

4888

item_id=item_id, query=params, ep=ep, headers=headers,

4889

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4890

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4891

err_note = 'Failed to resolve url (does the playlist exist?)'

4892

if fatal:

4893

raise ExtractorError(err_note, expected=True)

4894

self.report_warning(err_note, item_id)

4895

4896

_SEARCH_PARAMS = None

4897

4898

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4899

data = {'query': query}

4900

if params is NO_DEFAULT:

4901

params = self._SEARCH_PARAMS

4902

if params:

4903

data['params'] = params

4904

4905

content_keys = (

4906

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4907

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4908

# ytmusic search

4909

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4910

('continuationContents', ),

4911

)

4912

display_id = f'query "{query}"'

4913

check_get_keys = tuple({keys[0] for keys in content_keys})

4914

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4915

self._report_playlist_authcheck(ytcfg, fatal=False)

4916

4917

continuation_list = [None]

4918

search = None

4919

for page_num in itertools.count(1):

4920

data.update(continuation_list[0] or {})

4921

headers = self.generate_api_headers(

4922

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4923

search = self._extract_response(

4924

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4925

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4926

slr_contents = traverse_obj(search, *content_keys)

4927

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4928

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4933

IE_DESC = 'YouTube Tabs'

4934

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4943

(?P<not_channel>

4944

feed/|hashtag/|

4945

(?:playlist|watch)\?.*?\blist=

4946

)|

4947

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4952

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4953

}

4954

IE_NAME = 'youtube:tab'

4955

4956

_TESTS = [{

4957

'note': 'playlists, multipage',

4958

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4959

'playlist_mincount': 94,

4960

'info_dict': {

4961

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4962

'title': 'Igor Kleiner - Playlists',

4963

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4964

'uploader': 'Igor Kleiner',

4965

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4966

'channel': 'Igor Kleiner',

4967

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4968

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4969

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4970

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4971

'channel_follower_count': int

4972

},

4973

}, {

4974

'note': 'playlists, multipage, different order',

4975

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4976

'playlist_mincount': 94,

4977

'info_dict': {

4978

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4979

'title': 'Igor Kleiner - Playlists',

4980

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4981

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4982

'uploader': 'Igor Kleiner',

4983

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4984

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4985

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4986

'channel': 'Igor Kleiner',

4987

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4988

'channel_follower_count': int

4989

},

4990

}, {

4991

'note': 'playlists, series',

4992

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4993

'playlist_mincount': 5,

4994

'info_dict': {

4995

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4996

'title': '3Blue1Brown - Playlists',

4997

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4998

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4999

'uploader': '3Blue1Brown',

5000

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5001

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5002

'channel': '3Blue1Brown',

5003

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5004

'tags': ['Mathematics'],

5005

'channel_follower_count': int

5006

},

5007

}, {

5008

'note': 'playlists, singlepage',

5009

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5010

'playlist_mincount': 4,

5011

'info_dict': {

5012

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5013

'title': 'ThirstForScience - Playlists',

5014

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5015

'uploader': 'ThirstForScience',

5016

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5017

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5018

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5019

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5020

'tags': 'count:13',

5021

'channel': 'ThirstForScience',

5022

'channel_follower_count': int

5023

}

5024

}, {

5025

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5026

'only_matching': True,

5027

}, {

5028

'note': 'basic, single video playlist',

5029

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5030

'info_dict': {

5031

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5032

'uploader': 'Sergey M.',

5033

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5034

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5039

'channel': 'Sergey M.',

5040

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5041

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5042

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5043

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5048

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5049

'info_dict': {

5050

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5051

'uploader': 'Sergey M.',

5052

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5053

'title': 'youtube-dl empty playlist',

5054

'tags': [],

5055

'channel': 'Sergey M.',

5056

'description': '',

5057

'modified_date': '20160902',

5058

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5059

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5060

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5061

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5067

'info_dict': {

5068

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5069

'title': 'lex will - Home',

5070

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5071

'uploader': 'lex will',

5072

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5073

'channel': 'lex will',

5074

'tags': ['bible', 'history', 'prophesy'],

5075

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5076

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5077

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5078

'channel_follower_count': int

5079

},

5080

'playlist_mincount': 2,

5081

}, {

5082

'note': 'Videos tab',

5083

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5084

'info_dict': {

5085

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5086

'title': 'lex will - Videos',

5087

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5088

'uploader': 'lex will',

5089

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5090

'tags': ['bible', 'history', 'prophesy'],

5091

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5092

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5093

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5094

'channel': 'lex will',

5095

'channel_follower_count': int

5096

},

5097

'playlist_mincount': 975,

5098

}, {

5099

'note': 'Videos tab, sorted by popular',

5100

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5101

'info_dict': {

5102

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5103

'title': 'lex will - Videos',

5104

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5105

'uploader': 'lex will',

5106

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5107

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5108

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5109

'channel': 'lex will',

5110

'tags': ['bible', 'history', 'prophesy'],

5111

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5112

'channel_follower_count': int

5113

},

5114

'playlist_mincount': 199,

5115

}, {

5116

'note': 'Playlists tab',

5117

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5118

'info_dict': {

5119

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5120

'title': 'lex will - Playlists',

5121

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5122

'uploader': 'lex will',

5123

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5124

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5125

'channel': 'lex will',

5126

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5127

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5128

'tags': ['bible', 'history', 'prophesy'],

5129

'channel_follower_count': int

5130

},

5131

'playlist_mincount': 17,

5132

}, {

5133

'note': 'Community tab',

5134

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5135

'info_dict': {

5136

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5137

'title': 'lex will - Community',

5138

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5139

'uploader': 'lex will',

5140

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5141

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5142

'channel': 'lex will',

5143

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5144

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5145

'tags': ['bible', 'history', 'prophesy'],

5146

'channel_follower_count': int

5147

},

5148

'playlist_mincount': 18,

5149

}, {

5150

'note': 'Channels tab',

5151

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5152

'info_dict': {

5153

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5154

'title': 'lex will - Channels',

5155

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5156

'uploader': 'lex will',

5157

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5158

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5159

'channel': 'lex will',

5160

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5161

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5162

'tags': ['bible', 'history', 'prophesy'],

5163

'channel_follower_count': int

5164

},

5165

'playlist_mincount': 12,

5166

}, {

5167

'note': 'Search tab',

5168

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5169

'playlist_mincount': 40,

5170

'info_dict': {

5171

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5172

'title': '3Blue1Brown - Search - linear algebra',

5173

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5174

'uploader': '3Blue1Brown',

5175

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5176

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5177

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5178

'tags': ['Mathematics'],

5179

'channel': '3Blue1Brown',

5180

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5181

'channel_follower_count': int

5182

},

5183

}, {

5184

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5185

'only_matching': True,

5186

}, {

5187

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5188

'only_matching': True,

5189

}, {

5190

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5191

'only_matching': True,

5192

}, {

5193

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5194

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5195

'info_dict': {

5196

'title': '29C3: Not my department',

5197

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5198

'uploader': 'Christiaan008',

5199

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5200

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5201

'tags': [],

5202

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5203

'view_count': int,

5204

'modified_date': '20150605',

5205

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5206

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5207

'channel': 'Christiaan008',

5208

'availability': 'public',

5209

},

5210

'playlist_count': 96,

5211

}, {

5212

'note': 'Large playlist',

5213

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5214

'info_dict': {

5215

'title': 'Uploads from Cauchemar',

5216

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5217

'uploader': 'Cauchemar',

5218

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5219

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5220

'tags': [],

5221

'modified_date': r're:\d{8}',

5222

'channel': 'Cauchemar',

5223

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5224

'view_count': int,

5225

'description': '',

5226

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5227

'availability': 'public',

5228

},

5229

'playlist_mincount': 1123,

5230

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5231

}, {

5232

'note': 'even larger playlist, 8832 videos',

5233

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5234

'only_matching': True,

5235

}, {

5236

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5237

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5238

'info_dict': {

5239

'title': 'Uploads from Interstellar Movie',

5240

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5241

'uploader': 'Interstellar Movie',

5242

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5243

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5244

'tags': [],

5245

'view_count': int,

5246

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5247

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5248

'channel': 'Interstellar Movie',

5249

'description': '',

5250

'modified_date': r're:\d{8}',

5251

'availability': 'public',

5252

},

5253

'playlist_mincount': 21,

5254

}, {

5255

'note': 'Playlist with "show unavailable videos" button',

5256

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5257

'info_dict': {

5258

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5259

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5260

'uploader': 'Phim Siêu Nhân Nhật Bản',

5261

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5262

'view_count': int,

5263

'channel': 'Phim Siêu Nhân Nhật Bản',

5264

'tags': [],

5265

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5266

'description': '',

5267

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5268

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5269

'modified_date': r're:\d{8}',

5270

'availability': 'public',

5271

},

5272

'playlist_mincount': 200,

5273

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5274

}, {

5275

'note': 'Playlist with unavailable videos in page 7',

5276

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5277

'info_dict': {

5278

'title': 'Uploads from BlankTV',

5279

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5280

'uploader': 'BlankTV',

5281

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5282

'channel': 'BlankTV',

5283

'channel_url': 'https://www.youtube.com/c/blanktv',

5284

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5285

'view_count': int,

5286

'tags': [],

5287

'uploader_url': 'https://www.youtube.com/c/blanktv',

5288

'modified_date': r're:\d{8}',

5289

'description': '',

5290

'availability': 'public',

5291

},

5292

'playlist_mincount': 1000,

5293

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5294

}, {

5295

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5296

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5297

'info_dict': {

5298

'title': 'Data Analysis with Dr Mike Pound',

5299

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5300

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5301

'uploader': 'Computerphile',

5302

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5303

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5304

'tags': [],

5305

'view_count': int,

5306

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5307

'channel_url': 'https://www.youtube.com/user/Computerphile',

5308

'channel': 'Computerphile',

5309

'availability': 'public',

5310

},

5311

'playlist_mincount': 11,

5312

}, {

5313

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5314

'only_matching': True,

5315

}, {

5316

'note': 'Playlist URL that does not actually serve a playlist',

5317

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5322

'uploader': 'STREEM',

5323

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5324

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5325

'upload_date': '20150526',

5326

'license': 'Standard YouTube License',

5327

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5328

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5335

},

5336

'skip': 'This video is not available.',

5337

'add_ie': [YoutubeIE.ie_key()],

5338

}, {

5339

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5340

'only_matching': True,

5341

}, {

5342

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5343

'only_matching': True,

5344

}, {

5345

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5346

'info_dict': {

5347

'id': 'Wq15eF5vCbI', # This will keep changing

5348

'ext': 'mp4',

5349

'title': str,

5350

'uploader': 'Sky News',

5351

'uploader_id': 'skynews',

5352

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5353

'upload_date': r're:\d{8}',

5354

'description': str,

5355

'categories': ['News & Politics'],

5356

'tags': list,

5357

'like_count': int,

5358

'release_timestamp': 1642502819,

5359

'channel': 'Sky News',

5360

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5361

'age_limit': 0,

5362

'view_count': int,

5363

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5364

'playable_in_embed': True,

5365

'release_date': '20220118',

5366

'availability': 'public',

5367

'live_status': 'is_live',

5368

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5369

'channel_follower_count': int

5370

},

5371

'params': {

5372

'skip_download': True,

5373

},

5374

'expected_warnings': ['Ignoring subtitle tracks found in '],

5375

}, {

5376

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5381

'uploader': 'The Young Turks',

5382

'uploader_id': 'TheYoungTurks',

5383

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5384

'upload_date': '20150715',

5385

'license': 'Standard YouTube License',

5386

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5387

'categories': ['News & Politics'],

5388

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5393

},

5394

'only_matching': True,

5395

}, {

5396

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5397

'only_matching': True,

5398

}, {

5399

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5400

'only_matching': True,

5401

}, {

5402

'note': 'A channel that is not live. Should raise error',

5403

'url': 'https://www.youtube.com/user/numberphile/live',

5404

'only_matching': True,

5405

}, {

5406

'url': 'https://www.youtube.com/feed/trending',

5407

'only_matching': True,

5408

}, {

5409

'url': 'https://www.youtube.com/feed/library',

5410

'only_matching': True,

5411

}, {

5412

'url': 'https://www.youtube.com/feed/history',

5413

'only_matching': True,

5414

}, {

5415

'url': 'https://www.youtube.com/feed/subscriptions',

5416

'only_matching': True,

5417

}, {

5418

'url': 'https://www.youtube.com/feed/watch_later',

5419

'only_matching': True,

5420

}, {

5421

'note': 'Recommended - redirects to home page.',

5422

'url': 'https://www.youtube.com/feed/recommended',

5423

'only_matching': True,

5424

}, {

5425

'note': 'inline playlist with not always working continuations',

5426

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5427

'only_matching': True,

5428

}, {

5429

'url': 'https://www.youtube.com/course',

5430

'only_matching': True,

5431

}, {

5432

'url': 'https://www.youtube.com/zsecurity',

5433

'only_matching': True,

5434

}, {

5435

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5436

'only_matching': True,

5437

}, {

5438

'url': 'https://www.youtube.com/TheYoungTurks/live',

5439

'only_matching': True,

5440

}, {

5441

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5448

}, {

5449

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5450

'only_matching': True,

5451

}, {

5452

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5453

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5454

'only_matching': True

5455

}, {

5456

'note': '/browse/ should redirect to /channel/',

5457

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5458

'only_matching': True

5459

}, {

5460

'note': 'VLPL, should redirect to playlist?list=PL...',

5461

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5462

'info_dict': {

5463

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5464

'uploader': 'NoCopyrightSounds',

5465

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5466

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5467

'title': 'NCS : All Releases 💿',

5468

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5469

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5470

'modified_date': r're:\d{8}',

5471

'view_count': int,

5472

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5473

'tags': [],

5474

'channel': 'NoCopyrightSounds',

5475

'availability': 'public',

5476

},

5477

'playlist_mincount': 166,

5478

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5479

}, {

5480

'note': 'Topic, should redirect to playlist?list=UU...',

5481

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5482

'info_dict': {

5483

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5484

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5485

'title': 'Uploads from Royalty Free Music - Topic',

5486

'uploader': 'Royalty Free Music - Topic',

5487

'tags': [],

5488

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5489

'channel': 'Royalty Free Music - Topic',

5490

'view_count': int,

5491

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5492

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5493

'modified_date': r're:\d{8}',

5494

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5495

'description': '',

5496

'availability': 'public',

5497

},

5498

'expected_warnings': [

5499

'The URL does not have a videos tab',

5500

r'[Uu]navailable videos (are|will be) hidden',

5501

],

5502

'playlist_mincount': 101,

5503

}, {

5504

'note': 'Topic without a UU playlist',

5505

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5506

'info_dict': {

5507

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5508

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5509

'tags': [],

5510

},

5511

'expected_warnings': [

5512

'the playlist redirect gave error',

5513

],

5514

'playlist_mincount': 9,

5515

}, {

5516

'note': 'Youtube music Album',

5517

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5518

'info_dict': {

5519

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5520

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5525

'modified_date': r're:\d{8}',

5526

},

5527

'playlist_count': 50,

5528

}, {

5529

'note': 'unlisted single video playlist',

5530

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5531

'info_dict': {

5532

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5533

'uploader': 'colethedj',

5534

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5535

'title': 'yt-dlp unlisted playlist test',

5536

'availability': 'unlisted',

5537

'tags': [],

5538

'modified_date': '20220418',

5539

'channel': 'colethedj',

5540

'view_count': int,

5541

'description': '',

5542

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5543

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5544

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5549

'url': 'https://www.youtube.com/feed/recommended',

5550

'info_dict': {

5551

'id': 'recommended',

5552

'title': 'recommended',

5553

'tags': [],

5554

},

5555

'playlist_mincount': 50,

5556

'params': {

5557

'skip_download': True,

5558

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5559

},

5560

}, {

5561

'note': 'API Fallback: /videos tab, sorted by oldest first',

5562

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5563

'info_dict': {

5564

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5565

'title': 'Cody\'sLab - Videos',

5566

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5567

'uploader': 'Cody\'sLab',

5568

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5569

'channel': 'Cody\'sLab',

5570

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5571

'tags': [],

5572

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5573

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5574

'channel_follower_count': int

5575

},

5576

'playlist_mincount': 650,

5577

'params': {

5578

'skip_download': True,

5579

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5580

},

5581

}, {

5582

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5583

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5584

'info_dict': {

5585

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5586

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5587

'title': 'Uploads from Royalty Free Music - Topic',

5588

'uploader': 'Royalty Free Music - Topic',

5589

'modified_date': r're:\d{8}',

5590

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5591

'description': '',

5592

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5593

'tags': [],

5594

'channel': 'Royalty Free Music - Topic',

5595

'view_count': int,

5596

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5597

'availability': 'public',

5598

},

5599

'expected_warnings': [

5600

'does not have a videos tab',

5601

r'[Uu]navailable videos (are|will be) hidden',

5602

],

5603

'playlist_mincount': 101,

5604

'params': {

5605

'skip_download': True,

5606

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5607

},

5608

}, {

5609

'note': 'non-standard redirect to regional channel',

5610

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5611

'only_matching': True

5612

}, {

5613

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5614

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5615

'info_dict': {

5616

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5617

'modified_date': '20220407',

5618

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5619

'tags': [],

5620

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5621

'uploader': 'pukkandan',

5622

'availability': 'unlisted',

5623

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5624

'channel': 'pukkandan',

5625

'description': 'Test for collaborative playlist',

5626

'title': 'yt-dlp test - collaborative playlist',

5627

'view_count': int,

5628

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5629

},

5630

'playlist_mincount': 2

5631

}, {

5632

'note': 'translated tab name',

5633

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5634

'info_dict': {

5635

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5636

'tags': [],

5637

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5638

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5639

'description': '',

5640

'title': 'cole-dlp-test-acc - 再生リスト',

5641

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5642

'uploader': 'cole-dlp-test-acc',

5643

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5644

'channel': 'cole-dlp-test-acc',

5645

},

5646

'playlist_mincount': 1,

5647

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5648

'expected_warnings': ['Preferring "ja"'],

5649

}, {

5650

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5651

'note': 'preferred lang set with playlist with translated video titles',

5652

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5653

'info_dict': {

5654

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5655

'tags': [],

5656

'view_count': int,

5657

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5658

'uploader': 'cole-dlp-test-acc',

5659

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5660

'channel': 'cole-dlp-test-acc',

5661

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5662

'description': 'test',

5663

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5664

'title': 'dlp test playlist',

5665

'availability': 'public',

5666

},

5667

'playlist_mincount': 1,

5668

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5669

'expected_warnings': ['Preferring "ja"'],

5670

}, {

5671

# shorts audio pivot for 2GtVksBMYFM.

5672

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5673

'info_dict': {

5674

'id': 'sfv_audio_pivot',

5675

'title': 'sfv_audio_pivot',

5676

'tags': [],

5677

},

5678

'playlist_mincount': 50,

}]

@classmethod

def suitable(cls, url):

5684

return False if YoutubeIE.suitable(url) else super().suitable(url)

5685

5686

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5687

5688

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5689

def _real_extract(self, url, smuggled_data):

5690

item_id = self._match_id(url)

5691

url = urllib.parse.urlunparse(

5692

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5693

compat_opts = self.get_param('compat_opts', [])

5694

5695

def get_mobj(url):

5696

mobj = self._URL_RE.match(url).groupdict()

5697

mobj.update((k, '') for k, v in mobj.items() if v is None)

5698

return mobj

5699

5700

mobj, redirect_warning = get_mobj(url), None

5701

# Youtube returns incomplete data if tabname is not lower case

5702

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5703

if is_channel:

5704

if smuggled_data.get('is_music_url'):

5705

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5706

item_id = item_id[2:]

5707

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5708

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5709

mdata = self._extract_tab_endpoint(

5710

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5711

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5712

get_all=False, expected_type=str)

5713

if not murl:

5714

raise ExtractorError('Failed to resolve album to playlist')

5715

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5716

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5717

pre = f'https://www.youtube.com/channel/{item_id}'

5718

5719

original_tab_name = tab

5720

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5721

# Home URLs should redirect to /videos/

5722

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5723

'To download only the videos in the home page, add a "/featured" to the URL')

5724

tab = '/videos'

5725

5726

url = ''.join((pre, tab, post))

5727

mobj = get_mobj(url)

5728

5729

# Handle both video/playlist URLs

5730

qs = parse_qs(url)

5731

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5732

5733

if not video_id and mobj['not_channel'].startswith('watch'):

5734

if not playlist_id:

5735

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5736

raise ExtractorError('Unable to recognize tab page')

5737

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5738

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5739

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5740

mobj = get_mobj(url)

5741

5742

if video_id and playlist_id:

5743

if self.get_param('noplaylist'):

5744

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5745

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5746

ie=YoutubeIE.ie_key(), video_id=video_id)

5747

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5748

5749

data, ytcfg = self._extract_data(url, item_id)

5750

5751

# YouTube may provide a non-standard redirect to the regional channel

5752

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5753

redirect_url = traverse_obj(

5754

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5755

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5756

redirect_url = ''.join((

5757

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5758

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5759

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5760

5761

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5762

if tabs:

5763

selected_tab = self._extract_selected_tab(tabs)

5764

selected_tab_url = urljoin(

5765

url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

5766

translated_tab_name = selected_tab.get('title', '').lower()

5767

5768

# Prefer tab name from tab url as it is always in en,

5769

# but only when preferred lang is set as it may not extract reliably in all cases.

5770

selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name

5771

or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary

5772

or translated_tab_name)

5773

5774

if selected_tab_name == 'home':

5775

selected_tab_name = 'featured'

5776

requested_tab_name = mobj['tab'][1:]

5777

5778

if 'no-youtube-channel-redirect' not in compat_opts:

5779

if requested_tab_name == 'live': # Live tab should have redirected to the video

5780

raise UserNotLive(video_id=mobj['id'])

5781

if requested_tab_name not in ('', selected_tab_name):

5782

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5783

if not original_tab_name:

5784

if item_id[:2] == 'UC':

5785

# Topic channels don't have /videos. Use the equivalent playlist instead

5786

pl_id = f'UU{item_id[2:]}'

5787

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5788

try:

5789

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5790

except ExtractorError:

5791

redirect_warning += ' and the playlist redirect gave error'

5792

else:

5793

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5794

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5795

if selected_tab_name and selected_tab_name != requested_tab_name:

5796

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5797

else:

5798

raise ExtractorError(redirect_warning, expected=True)

5799

5800

if redirect_warning:

5801

self.to_screen(redirect_warning)

5802

self.write_debug(f'Final URL: {url}')

5803

5804

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5805

if 'no-youtube-unavailable-videos' not in compat_opts:

5806

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5807

self._extract_and_report_alerts(data, only_once=True)

5808

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5809

if tabs:

5810

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5811

5812

playlist = traverse_obj(

5813

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5814

if playlist:

5815

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5816

5817

video_id = traverse_obj(

5818

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5819

if video_id:

5820

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5821

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5822

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5823

ie=YoutubeIE.ie_key(), video_id=video_id)

5824

5825

raise ExtractorError('Unable to recognize tab page')

5826

5827

5828

class YoutubePlaylistIE(InfoExtractor):

5829

IE_DESC = 'YouTube playlists'

5830

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5841

)''' % {

5842

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5843

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5844

}

5845

IE_NAME = 'youtube:playlist'

5846

_TESTS = [{

5847

'note': 'issue #673',

5848

'url': 'PLBB231211A4F62143',

5849

'info_dict': {

5850

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5851

'id': 'PLBB231211A4F62143',

5852

'uploader': 'Wickman',

5853

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5854

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5855

'view_count': int,

5856

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5857

'modified_date': r're:\d{8}',

5858

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5859

'channel': 'Wickman',

5860

'tags': [],

5861

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5862

},

5863

'playlist_mincount': 29,

5864

}, {

5865

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5866

'info_dict': {

5867

'title': 'YDL_safe_search',

5868

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5869

},

5870

'playlist_count': 2,

5871

'skip': 'This playlist is private',

5872

}, {

5873

'note': 'embedded',

5874

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5879

'uploader': 'milan',

5880

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5881

'description': '',

5882

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5883

'tags': [],

5884

'modified_date': '20140919',

5885

'view_count': int,

5886

'channel': 'milan',

5887

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5888

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5889

'availability': 'public',

5890

},

5891

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5892

}, {

5893

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5894

'playlist_mincount': 455,

5895

'info_dict': {

5896

'title': '2018 Chinese New Singles (11/6 updated)',

5897

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5898

'uploader': 'LBK',

5899

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5900

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5901

'channel': 'LBK',

5902

'view_count': int,

5903

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5904

'tags': [],

5905

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5906

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5907

'modified_date': r're:\d{8}',

5908

'availability': 'public',

5909

},

5910

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5911

}, {

5912

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5913

'only_matching': True,

5914

}, {

5915

# music album playlist

5916

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5917

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5922

if YoutubeTabIE.suitable(url):

5923

return False

5924

from ..utils import parse_qs

5925

qs = parse_qs(url)

5926

if qs.get('v', [None])[0]:

5927

return False

5928

return super().suitable(url)

5929

5930

def _real_extract(self, url):

5931

playlist_id = self._match_id(url)

5932

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5933

url = update_url_query(

5934

'https://www.youtube.com/playlist',

5935

parse_qs(url) or {'list': playlist_id})

5936

if is_music_url:

5937

url = smuggle_url(url, {'is_music_url': True})

5938

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5939

5940

5941

class YoutubeYtBeIE(InfoExtractor):

5942

IE_DESC = 'youtu.be'

5943

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5944

_TESTS = [{

5945

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5950

'uploader': 'Backus-Page House Museum',

5951

'uploader_id': 'backuspagemuseum',

5952

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5953

'upload_date': '20161008',

5954

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5955

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5960

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5961

'channel': 'Backus-Page House Museum',

5962

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5963

'live_status': 'not_live',

5964

'view_count': int,

5965

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5966

'availability': 'public',

5967

'duration': 59,

5968

'comment_count': int,

5969

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5974

},

5975

}, {

5976

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5977

'only_matching': True,

5978

}]

5979

5980

def _real_extract(self, url):

5981

mobj = self._match_valid_url(url)

5982

video_id = mobj.group('id')

5983

playlist_id = mobj.group('playlist_id')

5984

return self.url_result(

5985

update_url_query('https://www.youtube.com/watch', {

5986

'v': video_id,

5987

'list': playlist_id,

5988

'feature': 'youtu.be',

5989

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5990

5991

5992

class YoutubeLivestreamEmbedIE(InfoExtractor):

5993

IE_DESC = 'YouTube livestream embeds'

5994

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5995

_TESTS = [{

5996

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5997

'only_matching': True,

5998

}]

5999

6000

def _real_extract(self, url):

6001

channel_id = self._match_id(url)

6002

return self.url_result(

6003

f'https://www.youtube.com/channel/{channel_id}/live',

6004

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6005

6006

6007

class YoutubeYtUserIE(InfoExtractor):

6008

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6009

IE_NAME = 'youtube:user'

6010

_VALID_URL = r'ytuser:(?P<id>.+)'

6011

_TESTS = [{

6012

'url': 'ytuser:phihag',

6013

'only_matching': True,

6014

}]

6015

6016

def _real_extract(self, url):

6017

user_id = self._match_id(url)

6018

return self.url_result(

6019

'https://www.youtube.com/user/%s/videos' % user_id,

6020

ie=YoutubeTabIE.ie_key(), video_id=user_id)

6021

6022

6023

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6024

IE_NAME = 'youtube:favorites'

6025

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6026

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6027

_LOGIN_REQUIRED = True

6028

_TESTS = [{

6029

'url': ':ytfav',

6030

'only_matching': True,

6031

}, {

6032

'url': ':ytfavorites',

6033

'only_matching': True,

6034

}]

6035

6036

def _real_extract(self, url):

6037

return self.url_result(

6038

'https://www.youtube.com/playlist?list=LL',

6039

ie=YoutubeTabIE.ie_key())

6040

6041

6042

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6043

IE_NAME = 'youtube:notif'

6044

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6045

_VALID_URL = r':ytnotif(?:ication)?s?'

6046

_LOGIN_REQUIRED = True

6047

_TESTS = [{

6048

'url': ':ytnotif',

6049

'only_matching': True,

6050

}, {

6051

'url': ':ytnotifications',

6052

'only_matching': True,

6053

}]

6054

6055

def _extract_notification_menu(self, response, continuation_list):

6056

notification_list = traverse_obj(

6057

response,

6058

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6059

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6060

expected_type=list) or []

6061

continuation_list[0] = None

6062

for item in notification_list:

6063

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6064

if entry:

6065

yield entry

6066

continuation = item.get('continuationItemRenderer')

6067

if continuation:

6068

continuation_list[0] = continuation

6069

6070

def _extract_notification_renderer(self, notification):

6071

video_id = traverse_obj(

6072

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6073

url = f'https://www.youtube.com/watch?v={video_id}'

6074

channel_id = None

6075

if not video_id:

6076

browse_ep = traverse_obj(

6077

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6078

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6079

post_id = self._search_regex(

6080

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6081

'post id', default=None)

6082

if not channel_id or not post_id:

6083

return

6084

# The direct /post url redirects to this in the browser

6085

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6086

6087

channel = traverse_obj(

6088

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6089

expected_type=str)

6090

notification_title = self._get_text(notification, 'shortMessage')

6091

if notification_title:

6092

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6093

# TODO: handle recommended videos

6094

title = self._search_regex(

6095

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6096

'video title', default=None)

6097

upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')

6098

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6104

'video_id': video_id,

6105

'title': title,

6106

'channel_id': channel_id,

6107

'channel': channel,

6108

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6109

'upload_date': upload_date,

6110

}

6111

6112

def _notification_menu_entries(self, ytcfg):

6113

continuation_list = [None]

6114

response = None

6115

for page in itertools.count(1):

6116

ctoken = traverse_obj(

6117

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6118

response = self._extract_response(

6119

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6120

ep='notification/get_notification_menu', check_get_keys='actions',

6121

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6122

yield from self._extract_notification_menu(response, continuation_list)

6123

if not continuation_list[0]:

6124

break

6125

6126

def _real_extract(self, url):

6127

display_id = 'notifications'

6128

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6129

self._report_playlist_authcheck(ytcfg)

6130

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6131

6132

6133

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6134

IE_DESC = 'YouTube search'

6135

IE_NAME = 'youtube:search'

6136

_SEARCH_KEY = 'ytsearch'

6137

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6138

_TESTS = [{

6139

'url': 'ytsearch5:youtube-dl test video',

6140

'playlist_count': 5,

6141

'info_dict': {

6142

'id': 'youtube-dl test video',

6143

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6149

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6150

_SEARCH_KEY = 'ytsearchdate'

6151

IE_DESC = 'YouTube search, newest videos first'

6152

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6153

_TESTS = [{

6154

'url': 'ytsearchdate5:youtube-dl test video',

6155

'playlist_count': 5,

6156

'info_dict': {

6157

'id': 'youtube-dl test video',

6158

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6164

IE_DESC = 'YouTube search URLs with sorting and filter support'

6165

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6166

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6167

_TESTS = [{

6168

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6169

'playlist_mincount': 5,

6170

'info_dict': {

6171

'id': 'youtube-dl test video',

6172

'title': 'youtube-dl test video',

6173

}

6174

}, {

6175

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6176

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6183

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6188

# 'entries': [{

6189

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6195

'only_matching': True,

6196

}]

6197

6198

def _real_extract(self, url):

6199

qs = parse_qs(url)

6200

query = (qs.get('search_query') or qs.get('q'))[0]

6201

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6202

6203

6204

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6205

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6206

IE_NAME = 'youtube:music:search_url'

6207

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6208

_TESTS = [{

6209

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6210

'playlist_count': 16,

6211

'info_dict': {

6212

'id': 'royalty free music',

6213

'title': 'royalty free music',

6214

}

6215

}, {

6216

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6217

'playlist_mincount': 30,

6218

'info_dict': {

6219

'id': 'royalty free music - songs',

6220

'title': 'royalty free music - songs',

6221

},

6222

'params': {'extract_flat': 'in_playlist'}

6223

}, {

6224

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6225

'playlist_mincount': 30,

6226

'info_dict': {

6227

'id': 'royalty free music - community playlists',

6228

'title': 'royalty free music - community playlists',

6229

},

6230

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6235

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6236

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6237

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6238

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6239

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6240

}

6241

6242

def _real_extract(self, url):

6243

qs = parse_qs(url)

6244

query = (qs.get('search_query') or qs.get('q'))[0]

6245

params = qs.get('sp', (None,))[0]

6246

if params:

6247

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6248

else:

6249

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6250

params = self._SECTIONS.get(section)

6251

if not params:

6252

section = None

6253

title = join_nonempty(query, section, delim=' - ')

6254

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6255

6256

6257

class YoutubeFeedsInfoExtractor(InfoExtractor):

6258

"""

6259

Base class for feed extractors

6260

Subclasses must re-define the _FEED_NAME property.

6261

"""

6262

_LOGIN_REQUIRED = True

6263

_FEED_NAME = 'feeds'

6264

6265

def _real_initialize(self):

6266

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6271

6272

def _real_extract(self, url):

6273

return self.url_result(

6274

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6275

6276

6277

class YoutubeWatchLaterIE(InfoExtractor):

6278

IE_NAME = 'youtube:watchlater'

6279

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6280

_VALID_URL = r':ytwatchlater'

6281

_TESTS = [{

6282

'url': ':ytwatchlater',

6283

'only_matching': True,

6284

}]

6285

6286

def _real_extract(self, url):

6287

return self.url_result(

6288

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6289

6290

6291

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6292

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6293

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6294

_FEED_NAME = 'recommended'

6295

_LOGIN_REQUIRED = False

6296

_TESTS = [{

6297

'url': ':ytrec',

6298

'only_matching': True,

6299

}, {

6300

'url': ':ytrecommended',

6301

'only_matching': True,

6302

}, {

6303

'url': 'https://youtube.com',

6304

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6309

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6310

_VALID_URL = r':ytsub(?:scription)?s?'

6311

_FEED_NAME = 'subscriptions'

6312

_TESTS = [{

6313

'url': ':ytsubs',

6314

'only_matching': True,

6315

}, {

6316

'url': ':ytsubscriptions',

6317

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6322

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6323

_VALID_URL = r':ythis(?:tory)?'

6324

_FEED_NAME = 'history'

6325

_TESTS = [{

6326

'url': ':ythistory',

6327

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6332

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6333

IE_NAME = 'youtube:stories'

6334

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6335

_TESTS = [{

6336

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6337

'only_matching': True,

6338

}]

6339

6340

def _real_extract(self, url):

6341

playlist_id = f'RLTD{self._match_id(url)}'

6342

return self.url_result(

6343

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6344

ie=YoutubeTabIE, video_id=playlist_id)

6345

6346

6347

class YoutubeShortsAudioPivotIE(InfoExtractor):

6348

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6349

IE_NAME = 'youtube:shorts:pivot:audio'

6350

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6351

_TESTS = [{

6352

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6353

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6358

"""

6359

Generates sfv_audio_pivot browse params for this video id

6360

"""

6361

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6362

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6363

6364

def _real_extract(self, url):

6365

video_id = self._match_id(url)

6366

return self.url_result(

6367

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6372

IE_NAME = 'youtube:truncated_url'

6373

IE_DESC = False # Do not list

6374

_VALID_URL = r'''(?x)

6375

(?:https?://)?

6376

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6377

(?:watch\?(?:

6378

feature=[a-z_]+|

6379

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6392

'only_matching': True,

6393

}, {

6394

'url': 'https://www.youtube.com/watch?',

6395

'only_matching': True,

6396

}, {

6397

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6398

'only_matching': True,

6399

}, {

6400

'url': 'https://www.youtube.com/watch?feature=foo',

6401

'only_matching': True,

6402

}, {

6403

'url': 'https://www.youtube.com/watch?hl=en-GB',

6404

'only_matching': True,

6405

}, {

6406

'url': 'https://www.youtube.com/watch?t=2372',

6407

'only_matching': True,

6408

}]

6409

6410

def _real_extract(self, url):

6411

raise ExtractorError(

6412

'Did you forget to quote the URL? Remember that & is a meta '

6413

'character in most shells, so you want to put the URL in quotes, '

6414

'like youtube-dl '

6415

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6416

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6421

IE_NAME = 'youtube:clip'

6422

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6423

_TESTS = [{

6424

# FIXME: Other metadata should be extracted from the clip, not from the base video

6425

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6426

'info_dict': {

6427

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6428

'ext': 'mp4',

6429

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6434

'categories': ['Gaming'],

6435

'channel': 'Scott The Woz',

6436

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6437

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6438

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6439

'like_count': int,

6440

'playable_in_embed': True,

6441

'tags': 'count:17',

6442

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6443

'title': 'Mobile Games on Console - Scott The Woz',

6444

'upload_date': '20210920',

6445

'uploader': 'Scott The Woz',

6446

'uploader_id': 'scottthewoz',

6447

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6448

'view_count': int,

6449

'live_status': 'not_live',

6450

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6455

clip_id = self._match_id(url)

6456

_, data = self._extract_webpage(url, clip_id)

6457

6458

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6459

if not video_id:

6460

raise ExtractorError('Unable to find video ID')

6461

6462

clip_data = traverse_obj(data, (

6463

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6464

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6465

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6466

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6467

6468

return {

6469

'_type': 'url_transparent',

6470

'url': f'https://www.youtube.com/watch?v={video_id}',

6471

'ie_key': YoutubeIE.ie_key(),

6472

'id': clip_id,

6473

'section_start': int(clip_data['startTimeMs']) / 1000,

6474

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6479

IE_NAME = 'youtube:truncated_id'

6480

IE_DESC = False # Do not list

6481

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6482

6483

_TESTS = [{

6484

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6485

'only_matching': True,

6486

}]

6487

6488

def _real_extract(self, url):

6489

video_id = self._match_id(url)

6490

raise ExtractorError(

6491

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6492

expected=True)