jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import enum
	6	import hashlib
	7	import itertools
	8	import json
	9	import math
	10	import os.path
	11	import random
	12	import re
	13	import sys
	14	import threading
	15	import time
	16	import traceback
	17	import urllib.error
	18	import urllib.parse
	19
	20	from .common import InfoExtractor, SearchInfoExtractor
	21	from .openload import PhantomJSwrapper
	22	from ..compat import functools
	23	from ..jsinterp import JSInterpreter
	24	from ..utils import (
	25	NO_DEFAULT,
	26	ExtractorError,
	27	LazyList,
	28	UserNotLive,
	29	bug_reports_message,
	30	classproperty,
	31	clean_html,
	32	datetime_from_str,
	33	dict_get,
	34	filter_dict,
	35	float_or_none,
	36	format_field,
	37	get_first,
	38	int_or_none,
	39	is_html,
	40	join_nonempty,
	41	js_to_json,
	42	mimetype2ext,
	43	network_exceptions,
	44	orderedSet,
	45	parse_codecs,
	46	parse_count,
	47	parse_duration,
	48	parse_iso8601,
	49	parse_qs,
	50	qualities,
	51	remove_start,
	52	smuggle_url,
	53	str_or_none,
	54	str_to_int,
	55	strftime_or_none,
	56	traverse_obj,
	57	try_get,
	58	unescapeHTML,
	59	unified_strdate,
	60	unified_timestamp,
	61	unsmuggle_url,
	62	update_url_query,
	63	url_or_none,
	64	urljoin,
	65	variadic,
	66	)
	67
	68	# any clients starting with _ cannot be explicitly requested by the user
	69	INNERTUBE_CLIENTS = {
	70	'web': {
	71	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	72	'INNERTUBE_CONTEXT': {
	73	'client': {
	74	'clientName': 'WEB',
	75	'clientVersion': '2.20220801.00.00',
	76	}
	77	},
	78	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	79	},
	80	'web_embedded': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB_EMBEDDED_PLAYER',
	85	'clientVersion': '1.20220731.00.00',
	86	},
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	89	},
	90	'web_music': {
	91	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	92	'INNERTUBE_HOST': 'music.youtube.com',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_REMIX',
	96	'clientVersion': '1.20220727.01.00',
	97	}
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	100	},
	101	'web_creator': {
	102	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_CREATOR',
	106	'clientVersion': '1.20220726.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	110	},
	111	'android': {
	112	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'ANDROID',
	116	'clientVersion': '17.31.35',
	117	'androidSdkVersion': 30,
	118	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	119	}
	120	},
	121	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	122	'REQUIRE_JS_PLAYER': False
	123	},
	124	'android_embedded': {
	125	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	126	'INNERTUBE_CONTEXT': {
	127	'client': {
	128	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	129	'clientVersion': '17.31.35',
	130	'androidSdkVersion': 30,
	131	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	132	},
	133	},
	134	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	135	'REQUIRE_JS_PLAYER': False
	136	},
	137	'android_music': {
	138	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	139	'INNERTUBE_CONTEXT': {
	140	'client': {
	141	'clientName': 'ANDROID_MUSIC',
	142	'clientVersion': '5.16.51',
	143	'androidSdkVersion': 30,
	144	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	145	}
	146	},
	147	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	148	'REQUIRE_JS_PLAYER': False
	149	},
	150	'android_creator': {
	151	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	152	'INNERTUBE_CONTEXT': {
	153	'client': {
	154	'clientName': 'ANDROID_CREATOR',
	155	'clientVersion': '22.30.100',
	156	'androidSdkVersion': 30,
	157	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '17.33.2',
	171	'deviceModel': 'iPhone14,3',
	172	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	173	}
	174	},
	175	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	176	'REQUIRE_JS_PLAYER': False
	177	},
	178	'ios_embedded': {
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MESSAGES_EXTENSION',
	182	'clientVersion': '17.33.2',
	183	'deviceModel': 'iPhone14,3',
	184	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '5.21',
	196	'deviceModel': 'iPhone14,3',
	197	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	198	},
	199	},
	200	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	201	'REQUIRE_JS_PLAYER': False
	202	},
	203	'ios_creator': {
	204	'INNERTUBE_CONTEXT': {
	205	'client': {
	206	'clientName': 'IOS_CREATOR',
	207	'clientVersion': '22.33.101',
	208	'deviceModel': 'iPhone14,3',
	209	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	210	},
	211	},
	212	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	213	'REQUIRE_JS_PLAYER': False
	214	},
	215	# mweb has 'ultralow' formats
	216	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	217	'mweb': {
	218	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	219	'INNERTUBE_CONTEXT': {
	220	'client': {
	221	'clientName': 'MWEB',
	222	'clientVersion': '2.20220801.00.00',
	223	}
	224	},
	225	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	226	},
	227	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	228	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	229	'tv_embedded': {
	230	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	231	'INNERTUBE_CONTEXT': {
	232	'client': {
	233	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	234	'clientVersion': '2.0',
	235	},
	236	},
	237	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	238	},
	239	}
	240
	241
	242	def _split_innertube_client(client_name):
	243	variant, *base = client_name.rsplit('.', 1)
	244	if base:
	245	return variant, base[0], variant
	246	base, *variant = client_name.split('_', 1)
	247	return client_name, base, variant[0] if variant else None
	248
	249
	250	def build_innertube_clients():
	251	THIRD_PARTY = {
	252	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	253	}
	254	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	255	priority = qualities(BASE_CLIENTS[::-1])
	256
	257	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	258	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	259	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	260	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	261	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	262
	263	_, base_client, variant = _split_innertube_client(client)
	264	ytcfg['priority'] = 10 * priority(base_client)
	265
	266	if not variant:
	267	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	268	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	269	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	270	embedscreen['priority'] -= 3
	271	elif variant == 'embedded':
	272	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	273	ytcfg['priority'] -= 2
	274	else:
	275	ytcfg['priority'] -= 3
	276
	277
	278	build_innertube_clients()
	279
	280
	281	class BadgeType(enum.Enum):
	282	AVAILABILITY_UNLISTED = enum.auto()
	283	AVAILABILITY_PRIVATE = enum.auto()
	284	AVAILABILITY_PUBLIC = enum.auto()
	285	AVAILABILITY_PREMIUM = enum.auto()
	286	AVAILABILITY_SUBSCRIPTION = enum.auto()
	287	LIVE_NOW = enum.auto()
	288
	289
	290	class YoutubeBaseInfoExtractor(InfoExtractor):
	291	"""Provide base functions for Youtube extractors"""
	292
	293	_RESERVED_NAMES = (
	294	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	295	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	296	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	297	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	298
	299	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	300
	301	# _NETRC_MACHINE = 'youtube'
	302
	303	# If True it will raise an error if no login info is provided
	304	_LOGIN_REQUIRED = False
	305
	306	_INVIDIOUS_SITES = (
	307	# invidious-redirect websites
	308	r'(?:www\.)?redirect\.invidious\.io',
	309	r'(?:(?:www\|dev)\.)?invidio\.us',
	310	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	311	r'(?:www\.)?invidious\.pussthecat\.org',
	312	r'(?:www\.)?invidious\.zee\.li',
	313	r'(?:www\.)?invidious\.ethibox\.fr',
	314	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	315	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	316	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	317	# youtube-dl invidious instances list
	318	r'(?:(?:www\|no)\.)?invidiou\.sh',
	319	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	320	r'(?:www\.)?invidious\.kabi\.tk',
	321	r'(?:www\.)?invidious\.mastodon\.host',
	322	r'(?:www\.)?invidious\.zapashcanon\.fr',
	323	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	324	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	325	r'(?:www\.)?invidious\.himiko\.cloud',
	326	r'(?:www\.)?invidious\.reallyancient\.tech',
	327	r'(?:www\.)?invidious\.tube',
	328	r'(?:www\.)?invidiou\.site',
	329	r'(?:www\.)?invidious\.site',
	330	r'(?:www\.)?invidious\.xyz',
	331	r'(?:www\.)?invidious\.nixnet\.xyz',
	332	r'(?:www\.)?invidious\.048596\.xyz',
	333	r'(?:www\.)?invidious\.drycat\.fr',
	334	r'(?:www\.)?inv\.skyn3t\.in',
	335	r'(?:www\.)?tube\.poal\.co',
	336	r'(?:www\.)?tube\.connect\.cafe',
	337	r'(?:www\.)?vid\.wxzm\.sx',
	338	r'(?:www\.)?vid\.mint\.lgbt',
	339	r'(?:www\.)?vid\.puffyan\.us',
	340	r'(?:www\.)?yewtu\.be',
	341	r'(?:www\.)?yt\.elukerio\.org',
	342	r'(?:www\.)?yt\.lelux\.fi',
	343	r'(?:www\.)?invidious\.ggc-project\.de',
	344	r'(?:www\.)?yt\.maisputain\.ovh',
	345	r'(?:www\.)?ytprivate\.com',
	346	r'(?:www\.)?invidious\.13ad\.de',
	347	r'(?:www\.)?invidious\.toot\.koeln',
	348	r'(?:www\.)?invidious\.fdn\.fr',
	349	r'(?:www\.)?watch\.nettohikari\.com',
	350	r'(?:www\.)?invidious\.namazso\.eu',
	351	r'(?:www\.)?invidious\.silkky\.cloud',
	352	r'(?:www\.)?invidious\.exonip\.de',
	353	r'(?:www\.)?invidious\.riverside\.rocks',
	354	r'(?:www\.)?invidious\.blamefran\.net',
	355	r'(?:www\.)?invidious\.moomoo\.de',
	356	r'(?:www\.)?ytb\.trom\.tf',
	357	r'(?:www\.)?yt\.cyberhost\.uk',
	358	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	359	r'(?:www\.)?qklhadlycap4cnod\.onion',
	360	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	361	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	362	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	363	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	364	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	365	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	366	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	367	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	368	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	369	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	370	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	371	r'(?:www\.)?piped\.kavin\.rocks',
	372	r'(?:www\.)?piped\.silkky\.cloud',
	373	r'(?:www\.)?piped\.tokhmi\.xyz',
	374	r'(?:www\.)?piped\.moomoo\.me',
	375	r'(?:www\.)?il\.ax',
	376	r'(?:www\.)?piped\.syncpundit\.com',
	377	r'(?:www\.)?piped\.mha\.fi',
	378	r'(?:www\.)?piped\.mint\.lgbt',
	379	r'(?:www\.)?piped\.privacy\.com\.de',
	380	)
	381
	382	# extracted from account/account_menu ep
	383	# XXX: These are the supported YouTube UI and API languages,
	384	# which is slightly different from languages supported for translation in YouTube studio
	385	_SUPPORTED_LANG_CODES = [
	386	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	387	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	388	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	389	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	390	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	391	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	392	]
	393
	394	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	395
	396	@functools.cached_property
	397	def _preferred_lang(self):
	398	"""
	399	Returns a language code supported by YouTube for the user preferred language.
	400	Returns None if no preferred language set.
	401	"""
	402	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	403	if not preferred_lang:
	404	return
	405	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	406	raise ExtractorError(
	407	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	408	expected=True)
	409	elif preferred_lang != 'en':
	410	self.report_warning(
	411	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	412	return preferred_lang
	413
	414	def _initialize_consent(self):
	415	cookies = self._get_cookies('https://www.youtube.com/')
	416	if cookies.get('__Secure-3PSID'):
	417	return
	418	consent_id = None
	419	consent = cookies.get('CONSENT')
	420	if consent:
	421	if 'YES' in consent.value:
	422	return
	423	consent_id = self._search_regex(
	424	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	425	if not consent_id:
	426	consent_id = random.randint(100, 999)
	427	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	428
	429	def _initialize_pref(self):
	430	cookies = self._get_cookies('https://www.youtube.com/')
	431	pref_cookie = cookies.get('PREF')
	432	pref = {}
	433	if pref_cookie:
	434	try:
	435	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	436	except ValueError:
	437	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	438	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	439	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	440
	441	def _real_initialize(self):
	442	self._initialize_pref()
	443	self._initialize_consent()
	444	self._check_login_required()
	445
	446	def _check_login_required(self):
	447	if self._LOGIN_REQUIRED and not self._cookies_passed:
	448	self.raise_login_required('Login details are needed to download this content', method='cookies')
	449
	450	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	451	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	452
	453	def _get_default_ytcfg(self, client='web'):
	454	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	455
	456	def _get_innertube_host(self, client='web'):
	457	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	458
	459	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	460	# try_get but with fallback to default ytcfg client values when present
	461	_func = lambda y: try_get(y, getter, expected_type)
	462	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	463
	464	def _extract_client_name(self, ytcfg, default_client='web'):
	465	return self._ytcfg_get_safe(
	466	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	467	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	468
	469	def _extract_client_version(self, ytcfg, default_client='web'):
	470	return self._ytcfg_get_safe(
	471	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	472	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	473
	474	def _select_api_hostname(self, req_api_hostname, default_client=None):
	475	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	476	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	477
	478	def _extract_api_key(self, ytcfg=None, default_client='web'):
	479	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	480
	481	def _extract_context(self, ytcfg=None, default_client='web'):
	482	context = get_first(
	483	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	484	# Enforce language and tz for extraction
	485	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	486	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	487	return context
	488
	489	_SAPISID = None
	490
	491	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	492	time_now = round(time.time())
	493	if self._SAPISID is None:
	494	yt_cookies = self._get_cookies('https://www.youtube.com')
	495	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	496	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	497	sapisid_cookie = dict_get(
	498	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	499	if sapisid_cookie and sapisid_cookie.value:
	500	self._SAPISID = sapisid_cookie.value

1

import base64

import calendar

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

21

from .openload import PhantomJSwrapper

22

from ..compat import functools

23

from ..jsinterp import JSInterpreter

24

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

69

INNERTUBE_CLIENTS = {

70

'web': {

71

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

72

'INNERTUBE_CONTEXT': {

73

'client': {

74

'clientName': 'WEB',

75

'clientVersion': '2.20220801.00.00',

76

}

77

},

78

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

79

},

80

'web_embedded': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB_EMBEDDED_PLAYER',

85

'clientVersion': '1.20220731.00.00',

86

},

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

89

},

90

'web_music': {

91

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

92

'INNERTUBE_HOST': 'music.youtube.com',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_REMIX',

96

'clientVersion': '1.20220727.01.00',

97

}

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

100

},

101

'web_creator': {

102

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_CREATOR',

106

'clientVersion': '1.20220726.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

110

},

111

'android': {

112

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'ANDROID',

116

'clientVersion': '17.31.35',

117

'androidSdkVersion': 30,

118

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

119

}

120

},

121

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

122

'REQUIRE_JS_PLAYER': False

123

},

124

'android_embedded': {

125

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

126

'INNERTUBE_CONTEXT': {

127

'client': {

128

'clientName': 'ANDROID_EMBEDDED_PLAYER',

129

'clientVersion': '17.31.35',

130

'androidSdkVersion': 30,

131

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

132

},

133

},

134

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

135

'REQUIRE_JS_PLAYER': False

136

},

137

'android_music': {

138

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

139

'INNERTUBE_CONTEXT': {

140

'client': {

141

'clientName': 'ANDROID_MUSIC',

142

'clientVersion': '5.16.51',

143

'androidSdkVersion': 30,

144

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

145

}

146

},

147

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

148

'REQUIRE_JS_PLAYER': False

149

},

150

'android_creator': {

151

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

152

'INNERTUBE_CONTEXT': {

153

'client': {

154

'clientName': 'ANDROID_CREATOR',

155

'clientVersion': '22.30.100',

156

'androidSdkVersion': 30,

157

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '17.33.2',

171

'deviceModel': 'iPhone14,3',

172

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

173

}

174

},

175

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

176

'REQUIRE_JS_PLAYER': False

177

},

178

'ios_embedded': {

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MESSAGES_EXTENSION',

182

'clientVersion': '17.33.2',

183

'deviceModel': 'iPhone14,3',

184

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '5.21',

196

'deviceModel': 'iPhone14,3',

197

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

198

},

199

},

200

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

201

'REQUIRE_JS_PLAYER': False

202

},

203

'ios_creator': {

204

'INNERTUBE_CONTEXT': {

205

'client': {

206

'clientName': 'IOS_CREATOR',

207

'clientVersion': '22.33.101',

208

'deviceModel': 'iPhone14,3',

209

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

210

},

211

},

212

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

213

'REQUIRE_JS_PLAYER': False

214

},

215

# mweb has 'ultralow' formats

216

# See: https://github.com/yt-dlp/yt-dlp/pull/557

217

'mweb': {

218

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

219

'INNERTUBE_CONTEXT': {

220

'client': {

221

'clientName': 'MWEB',

222

'clientVersion': '2.20220801.00.00',

223

}

224

},

225

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

226

},

227

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

228

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

229

'tv_embedded': {

230

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

231

'INNERTUBE_CONTEXT': {

232

'client': {

233

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

234

'clientVersion': '2.0',

235

},

236

},

237

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

243

variant, *base = client_name.rsplit('.', 1)

244

if base:

245

return variant, base[0], variant

246

base, *variant = client_name.split('_', 1)

247

return client_name, base, variant[0] if variant else None

248

249

250

def build_innertube_clients():

251

THIRD_PARTY = {

252

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

253

}

254

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

255

priority = qualities(BASE_CLIENTS[::-1])

256

257

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

258

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

259

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

260

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

261

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

262

263

_, base_client, variant = _split_innertube_client(client)

264

ytcfg['priority'] = 10 * priority(base_client)

265

266

if not variant:

267

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

268

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

269

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

270

embedscreen['priority'] -= 3

271

elif variant == 'embedded':

272

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

273

ytcfg['priority'] -= 2

274

else:

275

ytcfg['priority'] -= 3

276

277

278

build_innertube_clients()

279

280

281

class BadgeType(enum.Enum):

282

AVAILABILITY_UNLISTED = enum.auto()

283

AVAILABILITY_PRIVATE = enum.auto()

284

AVAILABILITY_PUBLIC = enum.auto()

285

AVAILABILITY_PREMIUM = enum.auto()

286

AVAILABILITY_SUBSCRIPTION = enum.auto()

287

LIVE_NOW = enum.auto()

288

289

290

class YoutubeBaseInfoExtractor(InfoExtractor):

291

"""Provide base functions for Youtube extractors"""

292

293

_RESERVED_NAMES = (

294

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

300

301

# _NETRC_MACHINE = 'youtube'

302

303

# If True it will raise an error if no login info is provided

304

_LOGIN_REQUIRED = False

305

306

_INVIDIOUS_SITES = (

307

# invidious-redirect websites

308

r'(?:www\.)?redirect\.invidious\.io',

309

r'(?:(?:www|dev)\.)?invidio\.us',

310

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

311

r'(?:www\.)?invidious\.pussthecat\.org',

312

r'(?:www\.)?invidious\.zee\.li',

313

r'(?:www\.)?invidious\.ethibox\.fr',

314

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

315

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

316

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

317

# youtube-dl invidious instances list

318

r'(?:(?:www|no)\.)?invidiou\.sh',

319

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

320

r'(?:www\.)?invidious\.kabi\.tk',

321

r'(?:www\.)?invidious\.mastodon\.host',

322

r'(?:www\.)?invidious\.zapashcanon\.fr',

323

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

324

r'(?:www\.)?invidious\.tinfoil-hat\.net',

325

r'(?:www\.)?invidious\.himiko\.cloud',

326

r'(?:www\.)?invidious\.reallyancient\.tech',

327

r'(?:www\.)?invidious\.tube',

328

r'(?:www\.)?invidiou\.site',

329

r'(?:www\.)?invidious\.site',

330

r'(?:www\.)?invidious\.xyz',

331

r'(?:www\.)?invidious\.nixnet\.xyz',

332

r'(?:www\.)?invidious\.048596\.xyz',

333

r'(?:www\.)?invidious\.drycat\.fr',

334

r'(?:www\.)?inv\.skyn3t\.in',

335

r'(?:www\.)?tube\.poal\.co',

336

r'(?:www\.)?tube\.connect\.cafe',

337

r'(?:www\.)?vid\.wxzm\.sx',

338

r'(?:www\.)?vid\.mint\.lgbt',

339

r'(?:www\.)?vid\.puffyan\.us',

340

r'(?:www\.)?yewtu\.be',

341

r'(?:www\.)?yt\.elukerio\.org',

342

r'(?:www\.)?yt\.lelux\.fi',

343

r'(?:www\.)?invidious\.ggc-project\.de',

344

r'(?:www\.)?yt\.maisputain\.ovh',

345

r'(?:www\.)?ytprivate\.com',

346

r'(?:www\.)?invidious\.13ad\.de',

347

r'(?:www\.)?invidious\.toot\.koeln',

348

r'(?:www\.)?invidious\.fdn\.fr',

349

r'(?:www\.)?watch\.nettohikari\.com',

350

r'(?:www\.)?invidious\.namazso\.eu',

351

r'(?:www\.)?invidious\.silkky\.cloud',

352

r'(?:www\.)?invidious\.exonip\.de',

353

r'(?:www\.)?invidious\.riverside\.rocks',

354

r'(?:www\.)?invidious\.blamefran\.net',

355

r'(?:www\.)?invidious\.moomoo\.de',

356

r'(?:www\.)?ytb\.trom\.tf',

357

r'(?:www\.)?yt\.cyberhost\.uk',

358

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

359

r'(?:www\.)?qklhadlycap4cnod\.onion',

360

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

361

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

362

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

363

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

364

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

365

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

366

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

367

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

368

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

369

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

370

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

371

r'(?:www\.)?piped\.kavin\.rocks',

372

r'(?:www\.)?piped\.silkky\.cloud',

373

r'(?:www\.)?piped\.tokhmi\.xyz',

374

r'(?:www\.)?piped\.moomoo\.me',

375

r'(?:www\.)?il\.ax',

376

r'(?:www\.)?piped\.syncpundit\.com',

377

r'(?:www\.)?piped\.mha\.fi',

378

r'(?:www\.)?piped\.mint\.lgbt',

379

r'(?:www\.)?piped\.privacy\.com\.de',

380

)

381

382

# extracted from account/account_menu ep

383

# XXX: These are the supported YouTube UI and API languages,

384

# which is slightly different from languages supported for translation in YouTube studio

385

_SUPPORTED_LANG_CODES = [

386

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

387

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

388

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

389

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

390

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

391

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

392

]

393

394

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

395

396

@functools.cached_property

397

def _preferred_lang(self):

398

"""

399

Returns a language code supported by YouTube for the user preferred language.

400

Returns None if no preferred language set.

401

"""

402

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

403

if not preferred_lang:

404

return

405

if preferred_lang not in self._SUPPORTED_LANG_CODES:

406

raise ExtractorError(

407

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

408

expected=True)

409

elif preferred_lang != 'en':

410

self.report_warning(

411

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

412

return preferred_lang

413

414

def _initialize_consent(self):

415

cookies = self._get_cookies('https://www.youtube.com/')

416

if cookies.get('__Secure-3PSID'):

417

return

418

consent_id = None

419

consent = cookies.get('CONSENT')

420

if consent:

421

if 'YES' in consent.value:

422

return

423

consent_id = self._search_regex(

424

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

425

if not consent_id:

426

consent_id = random.randint(100, 999)

427

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

428

429

def _initialize_pref(self):

430

cookies = self._get_cookies('https://www.youtube.com/')

431

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

436

except ValueError:

437

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

438

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

439

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

440

441

def _real_initialize(self):

442

self._initialize_pref()

443

self._initialize_consent()

444

self._check_login_required()

445

446

def _check_login_required(self):

447

if self._LOGIN_REQUIRED and not self._cookies_passed:

448

self.raise_login_required('Login details are needed to download this content', method='cookies')

449

450

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

451

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

452

453

def _get_default_ytcfg(self, client='web'):

454

return copy.deepcopy(INNERTUBE_CLIENTS[client])

455

456

def _get_innertube_host(self, client='web'):

457

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

458

459

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

460

# try_get but with fallback to default ytcfg client values when present

461

_func = lambda y: try_get(y, getter, expected_type)

462

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

463

464

def _extract_client_name(self, ytcfg, default_client='web'):

465

return self._ytcfg_get_safe(

466

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

467

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

468

469

def _extract_client_version(self, ytcfg, default_client='web'):

470

return self._ytcfg_get_safe(

471

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

472

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

473

474

def _select_api_hostname(self, req_api_hostname, default_client=None):

475

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

476

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

477

478

def _extract_api_key(self, ytcfg=None, default_client='web'):

479

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

480

481

def _extract_context(self, ytcfg=None, default_client='web'):

482

context = get_first(

483

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

484

# Enforce language and tz for extraction

485

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

486

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

492

time_now = round(time.time())

493

if self._SAPISID is None:

494

yt_cookies = self._get_cookies('https://www.youtube.com')

495

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

496

# See: https://github.com/yt-dlp/yt-dlp/issues/393

497

sapisid_cookie = dict_get(

498

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

499

if sapisid_cookie and sapisid_cookie.value:

500

self._SAPISID = sapisid_cookie.value

501

self.write_debug('Extracted SAPISID cookie')

502

# SAPISID cookie is required if not already present

503

if not yt_cookies.get('SAPISID'):

504

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

505

self._set_cookie(

506

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

507

else:

508

self._SAPISID = False

509

if not self._SAPISID:

510

return None

511

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

512

sapisidhash = hashlib.sha1(

513

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

514

return f'SAPISIDHASH {time_now}_{sapisidhash}'

515

516

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

517

note='Downloading API JSON', errnote='Unable to download API page',

518

context=None, api_key=None, api_hostname=None, default_client='web'):

519

520

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

521

data.update(query)

522

real_headers = self.generate_api_headers(default_client=default_client)

523

real_headers.update({'content-type': 'application/json'})

524

if headers:

525

real_headers.update(headers)

526

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

527

or api_key or self._extract_api_key(default_client=default_client))

528

return self._download_json(

529

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

530

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

531

data=json.dumps(data).encode('utf8'), headers=real_headers,

532

query={'key': api_key, 'prettyPrint': 'false'})

533

534

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

535

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

536

537

@staticmethod

538

def _extract_session_index(*data):

539

"""

540

Index of current account in account list.

541

See: https://github.com/yt-dlp/yt-dlp/pull/519

542

"""

543

for ytcfg in data:

544

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

545

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

550

if ytcfg:

551

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

556

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

557

'identity token', default=None, fatal=False)

558

559

@staticmethod

560

def _extract_account_syncid(*args):

561

"""

562

Extract syncId required to download private playlists of secondary channels

563

@params response and/or ytcfg

564

"""

565

for data in args:

566

# ytcfg includes channel_syncid if on secondary channel

567

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

572

lambda x: x['DATASYNC_ID']), str) or '').split('||')

573

if len(sync_ids) >= 2 and sync_ids[1]:

574

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

575

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

580

"""

581

Extracts visitorData from an API response or ytcfg

582

Appears to be used to track session state

583

"""

584

return get_first(

585

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

586

expected_type=str)

587

588

@functools.cached_property

589

def is_authenticated(self):

590

return bool(self._generate_sapisidhash_header())

591

592

def extract_ytcfg(self, video_id, webpage):

593

if not webpage:

594

return {}

595

return self._parse_json(

596

self._search_regex(

597

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

598

default='{}'), video_id, fatal=False) or {}

599

600

def generate_api_headers(

601

self, *, ytcfg=None, account_syncid=None, session_index=None,

602

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

603

604

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

605

headers = {

606

'X-YouTube-Client-Name': str(

607

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

608

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

609

'Origin': origin,

610

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

611

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

612

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

613

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

614

}

615

if session_index is None:

616

session_index = self._extract_session_index(ytcfg)

617

if account_syncid or session_index is not None:

618

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

619

620

auth = self._generate_sapisidhash_header(origin)

621

if auth is not None:

622

headers['Authorization'] = auth

623

headers['X-Origin'] = origin

624

return filter_dict(headers)

625

626

def _download_ytcfg(self, client, video_id):

627

url = {

628

'web': 'https://www.youtube.com',

629

'web_music': 'https://music.youtube.com',

630

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

635

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

636

return self.extract_ytcfg(video_id, webpage) or {}

637

638

@staticmethod

639

def _build_api_continuation_query(continuation, ctp=None):

640

query = {

641

'continuation': continuation

642

}

643

# TODO: Inconsistency with clickTrackingParams.

644

# Currently we have a fixed ctp contained within context (from ytcfg)

645

# and a ctp in root query for continuation.

646

if ctp:

647

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

652

next_continuation = try_get(

653

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

654

lambda x: x['continuation']['reloadContinuationData']), dict)

655

if not next_continuation:

656

return

657

continuation = next_continuation.get('continuation')

658

if not continuation:

659

return

660

ctp = next_continuation.get('clickTrackingParams')

661

return cls._build_api_continuation_query(continuation, ctp)

662

663

@classmethod

664

def _extract_continuation_ep_data(cls, continuation_ep: dict):

665

if isinstance(continuation_ep, dict):

666

continuation = try_get(

667

continuation_ep, lambda x: x['continuationCommand']['token'], str)

668

if not continuation:

669

return

670

ctp = continuation_ep.get('clickTrackingParams')

671

return cls._build_api_continuation_query(continuation, ctp)

672

673

@classmethod

674

def _extract_continuation(cls, renderer):

675

next_continuation = cls._extract_next_continuation_data(renderer)

676

if next_continuation:

677

return next_continuation

678

679

return traverse_obj(renderer, (

680

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

681

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

682

), get_all=False, expected_type=cls._extract_continuation_ep_data)

683

684

@classmethod

685

def _extract_alerts(cls, data):

686

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

687

if not isinstance(alert_dict, dict):

688

continue

689

for alert in alert_dict.values():

690

alert_type = alert.get('type')

691

if not alert_type:

692

continue

693

message = cls._get_text(alert, 'text')

694

if message:

695

yield alert_type, message

696

697

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

698

errors, warnings = [], []

699

for alert_type, alert_message in alerts:

700

if alert_type.lower() == 'error' and fatal:

701

errors.append([alert_type, alert_message])

702

elif alert_message not in self._IGNORED_WARNINGS:

703

warnings.append([alert_type, alert_message])

704

705

for alert_type, alert_message in (warnings + errors[:-1]):

706

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

707

if errors:

708

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

709

710

def _extract_and_report_alerts(self, data, *args, **kwargs):

711

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

712

713

def _extract_badges(self, renderer: dict):

714

privacy_icon_map = {

715

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

716

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

717

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

722

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

723

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

728

'private': BadgeType.AVAILABILITY_PRIVATE,

729

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

730

'live': BadgeType.LIVE_NOW,

731

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

736

badge_type = (

737

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

738

or badge_style_map.get(traverse_obj(badge, 'style'))

739

)

740

if badge_type:

741

badges.append({'type': badge_type})

742

continue

743

744

# fallback, won't work in some languages

745

label = traverse_obj(badge, 'label', expected_type=str, default='')

746

for match, label_badge_type in label_map.items():

747

if match in label.lower():

748

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

755

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

756

757

@staticmethod

758

def _get_text(data, *path_list, max_runs=None):

759

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

764

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

765

obj = [obj]

766

for item in obj:

767

text = try_get(item, lambda x: x['simpleText'], str)

768

if text:

769

return text

770

runs = try_get(item, lambda x: x['runs'], list) or []

771

if not runs and isinstance(item, list):

772

runs = item

773

774

runs = runs[:min(len(runs), max_runs or len(runs))]

775

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

780

count_text = self._get_text(data, *path_list) or ''

781

count = parse_count(count_text)

782

if count is None:

783

count = str_to_int(

784

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

789

"""

790

Extract thumbnails from thumbnails dict

791

@param path_list: path list to level that contains 'thumbnails' key

792

"""

793

thumbnails = []

794

for path in path_list or [()]:

795

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

796

thumbnail_url = url_or_none(thumbnail.get('url'))

797

if not thumbnail_url:

798

continue

799

# Sometimes youtube gives a wrong thumbnail URL. See:

800

# https://github.com/yt-dlp/yt-dlp/issues/233

801

# https://github.com/ytdl-org/youtube-dl/issues/28023

802

if 'maxresdefault' in thumbnail_url:

803

thumbnail_url = thumbnail_url.split('?')[0]

804

thumbnails.append({

805

'url': thumbnail_url,

806

'height': int_or_none(thumbnail.get('height')),

807

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

813

"""

814

Extracts a relative time from string and converts to dt object

815

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

820

if start:

821

return datetime_from_str(start)

822

try:

823

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

828

if not text:

829

return

830

dt = self.extract_relative_time(text)

831

timestamp = None

832

if isinstance(dt, datetime.datetime):

833

timestamp = calendar.timegm(dt.timetuple())

834

835

if timestamp is None:

836

timestamp = (

837

unified_timestamp(text) or unified_timestamp(

838

self._search_regex(

839

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

840

text.lower(), 'time text', default=None)))

841

842

if text and timestamp is None and self._preferred_lang in (None, 'en'):

843

self.report_warning(

844

f'Cannot parse localized time text "{text}"', only_once=True)

845

return timestamp

846

847

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

848

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

849

default_client='web'):

850

for retry in self.RetryManager():

851

try:

852

response = self._call_api(

853

ep=ep, fatal=True, headers=headers,

854

video_id=item_id, query=query, note=note,

855

context=self._extract_context(ytcfg, default_client),

856

api_key=self._extract_api_key(ytcfg, default_client),

857

api_hostname=api_hostname, default_client=default_client)

858

except ExtractorError as e:

859

if not isinstance(e.cause, network_exceptions):

860

return self._error_or_warning(e, fatal=fatal)

861

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

866

if not is_html(first_bytes):

867

yt_error = try_get(

868

self._parse_json(

869

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

870

lambda x: x['error']['message'], str)

871

if yt_error:

872

self._report_alerts([('ERROR', yt_error)], fatal=False)

873

# Downloading page may result in intermittent 5xx HTTP error

874

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

875

# We also want to catch all other network exceptions since errors in later pages can be troublesome

876

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

877

if e.cause.code not in (403, 429):

878

retry.error = e

879

continue

880

return self._error_or_warning(e, fatal=fatal)

881

882

try:

883

self._extract_and_report_alerts(response, only_once=True)

884

except ExtractorError as e:

885

# YouTube servers may return errors we want to retry on in a 200 OK response

886

# See: https://github.com/yt-dlp/yt-dlp/issues/839

887

if 'unknown error' in e.msg.lower():

888

retry.error = e

889

continue

890

return self._error_or_warning(e, fatal=fatal)

891

# Youtube sometimes sends incomplete data

892

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

893

if not traverse_obj(response, *variadic(check_get_keys)):

894

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

901

return re.match(r'https?://music\.youtube\.com/', url) is not None

902

903

def _extract_video(self, renderer):

904

video_id = renderer.get('videoId')

905

title = self._get_text(renderer, 'title')

906

description = self._get_text(renderer, 'descriptionSnippet')

907

duration = parse_duration(self._get_text(

908

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

909

if duration is None:

910

duration = parse_duration(self._search_regex(

911

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

912

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

913

video_id, default=None, group='duration'))

914

915

view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')

916

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

917

channel_id = traverse_obj(

918

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

919

expected_type=str, get_all=False)

920

time_text = self._get_text(renderer, 'publishedTimeText') or ''

921

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

922

overlay_style = traverse_obj(

923

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

924

get_all=False, expected_type=str)

925

badges = self._extract_badges(renderer)

926

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

927

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

928

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

929

expected_type=str)) or ''

930

url = f'https://www.youtube.com/watch?v={video_id}'

931

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

932

url = f'https://www.youtube.com/shorts/{video_id}'

933

934

live_status = (

935

'is_upcoming' if scheduled_timestamp is not None

936

else 'was_live' if 'streamed' in time_text.lower()

937

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

else None)

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

947

'duration': duration,

948

'uploader': uploader,

949

'channel_id': channel_id,

950

'thumbnails': thumbnails,

951

'timestamp': (self._parse_time_text(time_text)

952

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

953

else None),

954

'release_timestamp': scheduled_timestamp,

955

'availability':

956

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

957

else self._availability(

958

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

959

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

960

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

961

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

962

'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,

}

class YoutubeIE(YoutubeBaseInfoExtractor):

967

IE_DESC = 'YouTube'

968

_VALID_URL = r"""(?x)^

969

(

970

(?:https?://|//) # http(s):// or protocol-independent URL

971

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

972

(?:www\.)?deturl\.com/www\.youtube\.com|

973

(?:www\.)?pwnyoutube\.com|

974

(?:www\.)?hooktube\.com|

975

(?:www\.)?yourepeat\.com|

976

tube\.majestyc\.net|

977

%(invidious)s|

978

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

979

(?:.*?\#/)? # handle anchor (#/) redirect urls

980

(?: # the various things that can precede the ID:

981

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

982

|(?: # or the v= param in all its forms

983

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

984

(?:\?|\#!?) # the params delimiter ? or # or #!

985

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

991

vid\.plus| # or vid.plus/xxxx

992

zwearz\.com/watch| # or zwearz.com/watch/xxxx

993

%(invidious)s

994

)/

995

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

996

)

997

)? # all until now is optional -> you can pass the naked ID

998

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

999

(?(1).+)? # if we found the ID, everything can follow

1000

(?:\#|$)""" % {

1001

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1015

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1016

\1''',

1017

# https://wordpress.org/plugins/lazy-load-for-videos/

1018

r'''(?xs)

1019

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1020

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

]

_PLAYER_INFO_RE = (

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1025

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1026

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1027

)

1028

_formats = {

1029

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1030

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1031

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1032

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1033

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1034

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1035

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1036

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1037

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1038

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1039

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1040

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1041

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1042

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1043

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1044

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1045

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1046

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1051

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1052

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1053

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1054

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1055

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1056

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1057

1058

# Apple HTTP Live Streaming

1059

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1060

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1061

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1062

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1063

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1064

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1065

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1066

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1067

1068

# DASH mp4 video

1069

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1070

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1071

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1072

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1073

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1074

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1075

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1076

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1077

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1078

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1079

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1080

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1081

1082

# Dash mp4 audio

1083

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1084

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1085

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1086

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1087

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1088

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1089

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1090

1091

# Dash webm

1092

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1093

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1094

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1095

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1096

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1097

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1098

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1099

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1100

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1101

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1102

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1103

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1104

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1105

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1106

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1107

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1108

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1109

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1110

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1111

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1112

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1113

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1114

1115

# Dash webm audio

1116

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1117

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1118

1119

# Dash webm audio with opus inside

1120

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1121

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1122

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1123

1124

# RTMP (unnamed)

1125

'_rtmp': {'protocol': 'rtmp'},

1126

1127

# av01 video only formats sometimes served with "unknown" codecs

1128

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1129

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1130

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1131

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1132

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1133

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1134

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1135

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1136

}

1137

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1149

'uploader': 'Philipp Hagemeister',

1150

'uploader_id': 'phihag',

1151

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1152

'channel': 'Philipp Hagemeister',

1153

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1154

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1155

'upload_date': '20121002',

1156

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1157

'categories': ['Science & Technology'],

1158

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1163

'playable_in_embed': True,

1164

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1165

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1170

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1175

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1180

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1181

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1182

'uploader': 'SET India',

1183

'uploader_id': 'setindia',

1184

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1185

'age_limit': 18,

1186

},

1187

'skip': 'Private video',

1188

},

1189

{

1190

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1191

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1196

'uploader': 'Philipp Hagemeister',

1197

'uploader_id': 'phihag',

1198

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1199

'channel': 'Philipp Hagemeister',

1200

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1201

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1202

'upload_date': '20121002',

1203

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1204

'categories': ['Science & Technology'],

1205

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1210

'playable_in_embed': True,

1211

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1212

'live_status': 'not_live',

1213

'age_limit': 0,

1214

'comment_count': int,

1215

'channel_follower_count': int

1216

},

1217

'params': {

1218

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1223

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1228

'uploader_id': '8KVIDEO',

1229

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1230

'description': '',

1231

'uploader': '8KVIDEO',

1232

'title': 'UHDTV TEST 8K VIDEO.mp4'

1233

},

1234

'params': {

1235

'youtube_include_dash_manifest': True,

1236

'format': '141',

1237

},

1238

'skip': 'format 141 not served anymore',

1239

},

1240

# DASH manifest with encrypted signature

1241

{

1242

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1247

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1248

'duration': 244,

1249

'uploader': 'AfrojackVEVO',

1250

'uploader_id': 'AfrojackVEVO',

1251

'upload_date': '20131011',

1252

'abr': 129.495,

1253

'like_count': int,

1254

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1255

'playable_in_embed': True,

1256

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1257

'view_count': int,

1258

'track': 'The Spark',

1259

'live_status': 'not_live',

1260

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1261

'channel': 'Afrojack',

1262

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1263

'tags': 'count:19',

1264

'availability': 'public',

1265

'categories': ['Music'],

1266

'age_limit': 0,

1267

'alt_title': 'The Spark',

1268

'channel_follower_count': int

1269

},

1270

'params': {

1271

'youtube_include_dash_manifest': True,

1272

'format': '141/bestaudio[ext=m4a]',

1273

},

1274

},

1275

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1276

{

1277

'note': 'Embed allowed age-gate video',

1278

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1283

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1284

'duration': 142,

1285

'uploader': 'The Witcher',

1286

'uploader_id': 'WitcherGame',

1287

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1288

'upload_date': '20140605',

1289

'age_limit': 18,

1290

'categories': ['Gaming'],

1291

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1292

'availability': 'needs_auth',

1293

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1294

'like_count': int,

1295

'channel': 'The Witcher',

1296

'live_status': 'not_live',

1297

'tags': 'count:17',

1298

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1299

'playable_in_embed': True,

1300

'view_count': int,

1301

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1306

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1311

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1312

'upload_date': '20200408',

1313

'uploader_id': 'FlyingKitty900',

1314

'uploader': 'FlyingKitty',

1315

'age_limit': 18,

1316

'availability': 'needs_auth',

1317

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1318

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1319

'channel': 'FlyingKitty',

1320

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1321

'view_count': int,

1322

'categories': ['Entertainment'],

1323

'live_status': 'not_live',

1324

'tags': ['Flyingkitty', 'godzilla 2'],

1325

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1326

'like_count': int,

1327

'duration': 177,

1328

'playable_in_embed': True,

1329

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1334

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1335

'info_dict': {

1336

'id': 'Tq92D6wQ1mg',

1337

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1338

'ext': 'mp4',

1339

'upload_date': '20191228',

1340

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1341

'uploader': 'Projekt Melody',

1342

'description': 'md5:17eccca93a786d51bc67646756894066',

1343

'age_limit': 18,

1344

'like_count': int,

1345

'availability': 'needs_auth',

1346

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1347

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1348

'view_count': int,

1349

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1350

'channel': 'Projekt Melody',

1351

'live_status': 'not_live',

1352

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1353

'playable_in_embed': True,

1354

'categories': ['Entertainment'],

1355

'duration': 106,

1356

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1357

'comment_count': int,

1358

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1363

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1368

'uploader': 'Herr Lurik',

1369

'uploader_id': 'st3in234',

1370

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1371

'upload_date': '20130730',

1372

'track': 'Such mich find mich',

1373

'age_limit': 0,

1374

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1375

'like_count': int,

1376

'playable_in_embed': False,

1377

'creator': 'OOMPH!',

1378

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1379

'view_count': int,

1380

'alt_title': 'Such mich find mich',

1381

'duration': 210,

1382

'channel': 'Herr Lurik',

1383

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1384

'categories': ['Music'],

1385

'availability': 'public',

1386

'uploader_url': 'http://www.youtube.com/user/st3in234',

1387

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1388

'live_status': 'not_live',

1389

'artist': 'OOMPH!',

1390

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1395

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1396

'only_matching': True,

1397

},

1398

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1399

# YouTube Red ad is not captured for creator

1400

{

1401

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1407

'uploader_id': 'deadmau5',

1408

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1409

'creator': 'deadmau5',

1410

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1411

'uploader': 'deadmau5',

1412

'title': 'Deadmau5 - Some Chords (HD)',

1413

'alt_title': 'Some Chords',

1414

'availability': 'public',

1415

'tags': 'count:14',

1416

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1417

'view_count': int,

1418

'live_status': 'not_live',

1419

'channel': 'deadmau5',

1420

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1421

'like_count': int,

1422

'track': 'Some Chords',

1423

'artist': 'deadmau5',

1424

'playable_in_embed': True,

1425

'age_limit': 0,

1426

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1427

'categories': ['Music'],

1428

'album': 'Some Chords',

1429

'channel_follower_count': int

1430

},

1431

'expected_warnings': [

1432

'DASH manifest missing',

1433

]

1434

},

1435

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1436

{

1437

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1443

'uploader_id': 'olympic',

1444

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1445

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1446

'uploader': 'Olympics',

1447

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1448

'like_count': int,

1449

'release_timestamp': 1343767800,

1450

'playable_in_embed': True,

1451

'categories': ['Sports'],

1452

'release_date': '20120731',

1453

'channel': 'Olympics',

1454

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1455

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1456

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1457

'age_limit': 0,

1458

'availability': 'public',

1459

'live_status': 'was_live',

1460

'view_count': int,

1461

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1462

'channel_follower_count': int

1463

},

1464

'params': {

1465

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1475

'duration': 85,

1476

'upload_date': '20110310',

1477

'uploader_id': 'AllenMeow',

1478

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1479

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1480

'uploader': '孫ᄋᄅ',

1481

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1482

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1487

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1488

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1489

'view_count': int,

1490

'categories': ['People & Blogs'],

1491

'like_count': int,

1492

'live_status': 'not_live',

1493

'availability': 'unlisted',

1494

'comment_count': int,

1495

'channel_follower_count': int

1496

},

1497

},

1498

# url_encoded_fmt_stream_map is empty string

1499

{

1500

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1505

'description': '',

1506

'upload_date': '20150404',

1507

'uploader_id': 'spbelect',

1508

'uploader': 'Наблюдатели Петербурга',

1509

},

1510

'params': {

1511

'skip_download': 'requires avconv',

1512

},

1513

'skip': 'This live event has ended.',

1514

},

1515

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1516

{

1517

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1522

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1523

'duration': 220,

1524

'upload_date': '20150625',

1525

'uploader_id': 'dorappi2000',

1526

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1527

'uploader': 'dorappi2000',

1528

'formats': 'mincount:31',

1529

},

1530

'skip': 'not actual anymore',

1531

},

1532

# DASH manifest with segment_list

1533

{

1534

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1535

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1540

'uploader': 'Airtek',

1541

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1542

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1543

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1544

},

1545

'params': {

1546

'youtube_include_dash_manifest': True,

1547

'format': '135', # bestvideo

1548

},

1549

'skip': 'This live event has ended.',

1550

},

1551

{

1552

# Multifeed videos (multiple cameras), URL is for Main Camera

1553

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1554

'info_dict': {

1555

'id': 'jvGDaLqkpTg',

1556

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1557

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1564

'description': 'md5:e03b909557865076822aa169218d6a5d',

1565

'duration': 10643,

1566

'upload_date': '20161111',

1567

'uploader': 'Team PGP',

1568

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1569

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1576

'description': 'md5:e03b909557865076822aa169218d6a5d',

1577

'duration': 10991,

1578

'upload_date': '20161111',

1579

'uploader': 'Team PGP',

1580

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1581

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1588

'description': 'md5:e03b909557865076822aa169218d6a5d',

1589

'duration': 10995,

1590

'upload_date': '20161111',

1591

'uploader': 'Team PGP',

1592

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1593

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1600

'description': 'md5:e03b909557865076822aa169218d6a5d',

1601

'duration': 10990,

1602

'upload_date': '20161111',

1603

'uploader': 'Team PGP',

1604

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1605

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1610

},

1611

'skip': 'Not multifeed anymore',

1612

},

1613

{

1614

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1615

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1616

'info_dict': {

1617

'id': 'gVfLd0zydlo',

1618

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1619

},

1620

'playlist_count': 2,

1621

'skip': 'Not multifeed anymore',

1622

},

1623

{

1624

'url': 'https://vid.plus/FlRa-iH7PGw',

1625

'only_matching': True,

1626

},

1627

{

1628

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1629

'only_matching': True,

1630

},

1631

{

1632

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1633

# Also tests cut-off URL expansion in video description (see

1634

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1635

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1636

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1641

'alt_title': 'Dark Walk',

1642

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1643

'duration': 133,

1644

'upload_date': '20151119',

1645

'uploader_id': 'IronSoulElf',

1646

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1647

'uploader': 'IronSoulElf',

1648

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1649

'track': 'Dark Walk',

1650

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1651

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1652

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1653

'categories': ['Film & Animation'],

1654

'view_count': int,

1655

'live_status': 'not_live',

1656

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1657

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1658

'tags': 'count:13',

1659

'availability': 'public',

1660

'channel': 'IronSoulElf',

1661

'playable_in_embed': True,

1662

'like_count': int,

1663

'age_limit': 0,

1664

'channel_follower_count': int

1665

},

1666

'params': {

1667

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1672

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1673

'only_matching': True,

1674

},

1675

{

1676

# Video with yt:stretch=17:0

1677

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1682

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1683

'upload_date': '20151107',

1684

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1685

'uploader': 'CH GAMER DROID',

1686

},

1687

'params': {

1688

'skip_download': True,

1689

},

1690

'skip': 'This video does not exist.',

1691

},

1692

{

1693

# Video with incomplete 'yt:stretch=16:'

1694

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1695

'only_matching': True,

1696

},

1697

{

1698

# Video licensed under Creative Commons

1699

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1704

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1705

'duration': 721,

1706

'upload_date': '20150128',

1707

'uploader_id': 'BerkmanCenter',

1708

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1709

'uploader': 'The Berkman Klein Center for Internet & Society',

1710

'license': 'Creative Commons Attribution license (reuse allowed)',

1711

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1712

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1713

'like_count': int,

1714

'age_limit': 0,

1715

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1716

'channel': 'The Berkman Klein Center for Internet & Society',

1717

'availability': 'public',

1718

'view_count': int,

1719

'categories': ['Education'],

1720

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1721

'live_status': 'not_live',

1722

'playable_in_embed': True,

1723

'comment_count': int,

1724

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1733

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1738

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1739

'duration': 4060,

1740

'upload_date': '20151120',

1741

'uploader': 'Bernie Sanders',

1742

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1743

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1744

'license': 'Creative Commons Attribution license (reuse allowed)',

1745

'playable_in_embed': True,

1746

'tags': 'count:12',

1747

'like_count': int,

1748

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1749

'age_limit': 0,

1750

'availability': 'public',

1751

'categories': ['News & Politics'],

1752

'channel': 'Bernie Sanders',

1753

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1754

'view_count': int,

1755

'live_status': 'not_live',

1756

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1757

'comment_count': int,

1758

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1767

'only_matching': True,

1768

},

1769

{

1770

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1771

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1772

'only_matching': True,

1773

},

1774

{

1775

# Rental video preview

1776

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1781

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1782

'upload_date': '20150811',

1783

'uploader': 'FlixMatrix',

1784

'uploader_id': 'FlixMatrixKaravan',

1785

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1786

'license': 'Standard YouTube License',

1787

},

1788

'params': {

1789

'skip_download': True,

1790

},

1791

'skip': 'This video is not available.',

1792

},

1793

{

1794

# YouTube Red video with episode data

1795

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1800

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1801

'duration': 2085,

1802

'upload_date': '20170118',

1803

'uploader': 'Vsauce',

1804

'uploader_id': 'Vsauce',

1805

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1806

'series': 'Mind Field',

1807

'season_number': 1,

1808

'episode_number': 1,

1809

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1810

'tags': 'count:12',

1811

'view_count': int,

1812

'availability': 'public',

1813

'age_limit': 0,

1814

'channel': 'Vsauce',

1815

'episode': 'Episode 1',

1816

'categories': ['Entertainment'],

1817

'season': 'Season 1',

1818

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1819

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1820

'like_count': int,

1821

'playable_in_embed': True,

1822

'live_status': 'not_live',

1823

'channel_follower_count': int

1824

},

1825

'params': {

1826

'skip_download': True,

1827

},

1828

'expected_warnings': [

1829

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1834

# as inappropriate or offensive to some audiences.

1835

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1840

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1841

'duration': 965,

1842

'upload_date': '20140124',

1843

'uploader': 'New Century Foundation',

1844

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1845

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1846

},

1847

'params': {

1848

'skip_download': True,

1849

},

1850

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1855

'only_matching': True,

1856

},

1857

{

1858

# geo restricted to JP

1859

'url': 'sJL6WA-aGkQ',

1860

'only_matching': True,

1861

},

1862

{

1863

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1864

'only_matching': True,

1865

},

1866

{

1867

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1868

'only_matching': True,

1869

},

1870

{

1871

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1872

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1873

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1878

'only_matching': True,

1879

},

1880

{

1881

# Video with unsupported adaptive stream type formats

1882

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1887

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1888

'duration': 433,

1889

'upload_date': '20130923',

1890

'uploader': 'Amelia Putri Harwita',

1891

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1892

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1893

'formats': 'maxcount:10',

1894

},

1895

'params': {

1896

'skip_download': True,

1897

'youtube_include_dash_manifest': False,

1898

},

1899

'skip': 'not actual anymore',

1900

},

1901

{

1902

# Youtube Music Auto-generated description

1903

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1908

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1909

'upload_date': '20190312',

1910

'uploader': 'Stephen - Topic',

1911

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1912

'artist': 'Stephen',

1913

'track': 'Voyeur Girl',

1914

'album': 'it\'s too much love to know my dear',

1915

'release_date': '20190313',

1916

'release_year': 2019,

1917

'alt_title': 'Voyeur Girl',

1918

'view_count': int,

1919

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1920

'playable_in_embed': True,

1921

'like_count': int,

1922

'categories': ['Music'],

1923

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1924

'channel': 'Stephen',

1925

'availability': 'public',

1926

'creator': 'Stephen',

1927

'duration': 169,

1928

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1929

'age_limit': 0,

1930

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1931

'tags': 'count:11',

1932

'live_status': 'not_live',

1933

'channel_follower_count': int

1934

},

1935

'params': {

1936

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1941

'only_matching': True,

1942

},

1943

{

1944

# invalid -> valid video id redirection

1945

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1950

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1951

'upload_date': '20090125',

1952

'uploader': 'Prochorowka',

1953

'uploader_id': 'Prochorowka',

1954

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1955

'artist': 'Panjabi MC',

1956

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1957

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1958

},

1959

'params': {

1960

'skip_download': True,

1961

},

1962

'skip': 'Video unavailable',

1963

},

1964

{

1965

# empty description results in an empty string

1966

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1973

'uploader_id': 'ElevageOrVert',

1974

'uploader': 'ElevageOrVert',

1975

'view_count': int,

1976

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1977

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1978

'like_count': int,

1979

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1980

'tags': [],

1981

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1982

'availability': 'public',

1983

'age_limit': 0,

1984

'categories': ['Pets & Animals'],

1985

'duration': 7,

1986

'playable_in_embed': True,

1987

'live_status': 'not_live',

1988

'channel': 'ElevageOrVert',

1989

'channel_follower_count': int

1990

},

1991

'params': {

1992

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1997

# see [2] for an example with '};' inside ytInitialPlayerResponse

1998

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1999

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2000

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2005

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2006

'upload_date': '20130831',

2007

'uploader_id': 'kudvenkat',

2008

'uploader': 'kudvenkat',

2009

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2010

'like_count': int,

2011

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2012

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2013

'live_status': 'not_live',

2014

'categories': ['Education'],

2015

'availability': 'public',

2016

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2017

'tags': 'count:12',

2018

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2023

'comment_count': int,

2024

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2033

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2034

'only_matching': True,

2035

},

2036

{

2037

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2038

'only_matching': True,

2039

},

2040

{

2041

# https://github.com/ytdl-org/youtube-dl/pull/28094

2042

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2048

'upload_date': '20141120',

2049

'uploader': 'The Cinematic Orchestra - Topic',

2050

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2051

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2052

'artist': 'The Cinematic Orchestra',

2053

'track': 'Burn Out',

2054

'album': 'Every Day',

2055

'like_count': int,

2056

'live_status': 'not_live',

2057

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2062

'creator': 'The Cinematic Orchestra',

2063

'channel': 'The Cinematic Orchestra',

2064

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2065

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2066

'availability': 'public',

2067

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2068

'categories': ['Music'],

2069

'playable_in_embed': True,

2070

'channel_follower_count': int

2071

},

2072

'params': {

2073

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2078

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2079

'only_matching': True,

2080

},

2081

{

2082

# controversial video, requires bpctr/contentCheckOk

2083

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2088

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2089

'uploader': 'CBS Mornings',

2090

'uploader_id': 'CBSThisMorning',

2091

'upload_date': '20140716',

2092

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2093

'duration': 170,

2094

'categories': ['News & Politics'],

2095

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2096

'view_count': int,

2097

'channel': 'CBS Mornings',

2098

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2099

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2100

'age_limit': 18,

2101

'availability': 'needs_auth',

2102

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2103

'like_count': int,

2104

'live_status': 'not_live',

2105

'playable_in_embed': True,

2106

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2111

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2116

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2117

'upload_date': '20201120',

2118

'uploader': 'Walk around Japan',

2119

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2120

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2121

'duration': 1456,

2122

'categories': ['Travel & Events'],

2123

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2124

'view_count': int,

2125

'channel': 'Walk around Japan',

2126

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2127

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2128

'age_limit': 0,

2129

'availability': 'public',

2130

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2131

'live_status': 'not_live',

2132

'playable_in_embed': True,

2133

'channel_follower_count': int

2134

},

2135

'params': {

2136

'skip_download': True,

2137

},

2138

}, {

2139

# Has multiple audio streams

2140

'url': 'WaOKSUlf4TM',

2141

'only_matching': True

2142

}, {

2143

# Requires Premium: has format 141 when requested using YTM url

2144

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2145

'only_matching': True

2146

}, {

2147

# multiple subtitles with same lang_code

2148

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2149

'only_matching': True,

2150

}, {

2151

# Force use android client fallback

2152

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2153

'info_dict': {

2154

'id': 'YOelRv7fMxY',

2155

'title': 'DIGGING A SECRET TUNNEL Part 1',

2156

'ext': '3gp',

2157

'upload_date': '20210624',

2158

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2159

'uploader': 'colinfurze',

2160

'uploader_id': 'colinfurze',

2161

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2162

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2163

'duration': 596,

2164

'categories': ['Entertainment'],

2165

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2166

'view_count': int,

2167

'channel': 'colinfurze',

2168

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2169

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2170

'age_limit': 0,

2171

'availability': 'public',

2172

'like_count': int,

2173

'live_status': 'not_live',

2174

'playable_in_embed': True,

2175

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2180

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2185

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2186

'only_matching': True,

2187

'params': {

2188

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2193

'only_matching': True,

2194

}, {

2195

'note': 'Storyboards',

2196

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2202

'uploader_id': 'scishow',

2203

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2204

'upload_date': '20140324',

2205

'uploader': 'SciShow',

2206

'like_count': int,

2207

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2208

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2209

'view_count': int,

2210

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2211

'playable_in_embed': True,

2212

'tags': 'count:12',

2213

'uploader_url': 'http://www.youtube.com/user/scishow',

2214

'availability': 'public',

2215

'channel': 'SciShow',

2216

'live_status': 'not_live',

2217

'duration': 248,

2218

'categories': ['Education'],

2219

'age_limit': 0,

2220

'channel_follower_count': int,

2221

'chapters': list,

2222

}, 'params': {'format': 'mhtml', 'skip_download': True}

2223

}, {

2224

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2225

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2230

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2231

'uploader': 'Leon Nguyen',

2232

'uploader_id': 'VNSXIII',

2233

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2234

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2235

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2240

'tags': 'count:23',

2241

'playable_in_embed': True,

2242

'live_status': 'not_live',

2243

'upload_date': '20220103',

2244

'like_count': int,

2245

'availability': 'public',

2246

'channel': 'Leon Nguyen',

2247

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2248

'comment_count': int,

2249

'channel_follower_count': int

2250

}

2251

}, {

2252

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2253

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2258

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2259

'uploader': 'Leon Nguyen',

2260

'uploader_id': 'VNSXIII',

2261

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2262

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2263

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2268

'tags': 'count:23',

2269

'playable_in_embed': True,

2270

'live_status': 'not_live',

2271

'upload_date': '20220102',

2272

'like_count': int,

2273

'availability': 'public',

2274

'channel': 'Leon Nguyen',

2275

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2276

'comment_count': int,

2277

'channel_follower_count': int

2278

},

2279

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2280

}, {

2281

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2282

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2287

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2288

'uploader': 'Quackity',

2289

'uploader_id': 'QuackityHQ',

2290

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2291

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2292

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2297

'tags': 'count:26',

2298

'playable_in_embed': True,

2299

'live_status': 'not_live',

2300

'release_timestamp': 1641172509,

2301

'release_date': '20220103',

2302

'upload_date': '20220103',

2303

'like_count': int,

2304

'availability': 'public',

2305

'channel': 'Quackity',

2306

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2307

'channel_follower_count': int

2308

}

2309

},

2310

{ # continuous livestream. Microformat upload date should be preferred.

2311

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2312

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2313

'info_dict': {

2314

'id': 'kgx4WGK0oNU',

2315

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2316

'ext': 'mp4',

2317

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2318

'availability': 'public',

2319

'age_limit': 0,

2320

'release_timestamp': 1637975704,

2321

'upload_date': '20210619',

2322

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2323

'live_status': 'is_live',

2324

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2325

'uploader': '阿鲍Abao',

2326

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2327

'channel': 'Abao in Tokyo',

2328

'channel_follower_count': int,

2329

'release_date': '20211127',

2330

'tags': 'count:39',

2331

'categories': ['People & Blogs'],

2332

'like_count': int,

2333

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2334

'view_count': int,

2335

'playable_in_embed': True,

2336

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2337

'concurrent_view_count': int,

2338

},

2339

'params': {'skip_download': True}

2340

}, {

2341

# Story. Requires specific player params to work.

2342

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2347

'view_count': int,

2348

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2349

'upload_date': '20220526',

2350

'categories': ['Education'],

2351

'title': 'Story',

2352

'channel': 'IT\'S HISTORY',

2353

'description': '',

2354

'uploader_id': 'BlastfromthePast',

2355

'duration': 12,

2356

'uploader': 'IT\'S HISTORY',

2357

'playable_in_embed': True,

2358

'age_limit': 0,

2359

'live_status': 'not_live',

2360

'tags': [],

2361

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2362

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2363

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2364

},

2365

'skip': 'stories get removed after some period of time',

2366

}, {

2367

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2372

'upload_date': '20220323',

2373

'like_count': int,

2374

'availability': 'unlisted',

2375

'channel': 'nao20010128nao',

2376

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2377

'age_limit': 0,

2378

'uploader': 'nao20010128nao',

2379

'uploader_id': 'nao20010128nao',

2380

'categories': ['Music'],

2381

'view_count': int,

2382

'description': '',

2383

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2384

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2385

'live_status': 'not_live',

2386

'playable_in_embed': True,

2387

'channel_follower_count': int,

2388

'duration': 6,

2389

'tags': [],

2390

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2391

}

2392

}, {

2393

# Prefer primary title+description language metadata by default

2394

# Do not prefer translated description if primary is empty

2395

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2400

'description': '',

2401

'channel': 'cole-dlp-test-acc',

2402

'tags': [],

2403

'view_count': int,

2404

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2405

'like_count': int,

2406

'playable_in_embed': True,

2407

'availability': 'unlisted',

2408

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2409

'age_limit': 0,

2410

'duration': 5,

2411

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2412

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2413

'live_status': 'not_live',

2414

'upload_date': '20220908',

2415

'categories': ['People & Blogs'],

2416

'uploader': 'cole-dlp-test-acc',

2417

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2418

},

2419

'params': {'skip_download': True}

2420

}, {

2421

# Extractor argument: prefer translated title+description

2422

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2427

'tags': [],

2428

'duration': 5,

2429

'live_status': 'not_live',

2430

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2431

'upload_date': '20220728',

2432

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2433

'view_count': int,

2434

'categories': ['People & Blogs'],

2435

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2436

'title': 'dlp test video title translated (fr)',

2437

'availability': 'public',

2438

'uploader': 'cole-dlp-test-acc',

2439

'age_limit': 0,

2440

'description': 'dlp test video description translated (fr)',

2441

'playable_in_embed': True,

2442

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2443

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2444

},

2445

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2446

'expected_warnings': [r'Preferring "fr" translated fields'],

2447

}, {

2448

'note': '6 channel audio',

2449

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2450

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2456

{

2457

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2458

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2463

'upload_date': '20080526',

2464

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2465

'uploader': 'Christopher Sykes',

2466

'uploader_id': 'ChristopherJSykes',

2467

'age_limit': 0,

2468

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2469

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2470

'playable_in_embed': True,

2471

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2472

'like_count': int,

2473

'comment_count': int,

2474

'channel': 'Christopher Sykes',

2475

'live_status': 'not_live',

2476

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2477

'availability': 'public',

2478

'duration': 195,

2479

'view_count': int,

2480

'categories': ['Science & Technology'],

2481

'channel_follower_count': int,

2482

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2483

},

2484

'params': {

2485

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2492

from ..utils import parse_qs

2493

2494

qs = parse_qs(url)

2495

if qs.get('list', [None])[0]:

2496

return False

2497

return super().suitable(url)

2498

2499

def __init__(self, *args, **kwargs):

2500

super().__init__(*args, **kwargs)

2501

self._code_cache = {}

2502

self._player_cache = {}

2503

2504

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2505

lock = threading.Lock()

2506

start_time = time.time()

2507

formats = [f for f in formats if f.get('is_from_start')]

2508

2509

def refetch_manifest(format_id, delay):

2510

nonlocal formats, start_time, is_live

2511

if time.time() <= start_time + delay:

2512

return

2513

2514

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2515

video_details = traverse_obj(

2516

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2517

microformats = traverse_obj(

2518

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2519

expected_type=dict, default=[])

2520

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2521

is_live = live_status == 'is_live'

2522

start_time = time.time()

2523

2524

def mpd_feed(format_id, delay):

2525

"""

2526

@returns (manifest_url, manifest_stream_number, is_live) or None

2527

"""

2528

with lock:

2529

refetch_manifest(format_id, delay)

2530

2531

f = next((f for f in formats if f['format_id'] == format_id), None)

2532

if not f:

2533

if not is_live:

2534

self.to_screen(f'{video_id}: Video is no longer live')

2535

else:

2536

self.report_warning(

2537

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2538

return None

2539

return f['manifest_url'], f['manifest_stream_number'], is_live

2540

2541

for f in formats:

2542

f['is_live'] = is_live

2543

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2544

live_start_time, mpd_feed, not is_live and f.copy())

2545

if is_live:

2546

f['fragments'] = gen

2547

f['protocol'] = 'http_dash_segments_generator'

2548

else:

2549

f['fragments'] = LazyList(gen({}))

2550

del f['is_from_start']

2551

2552

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2553

FETCH_SPAN, MAX_DURATION = 5, 432000

2554

2555

mpd_url, stream_number, is_live = None, None, True

2556

2557

begin_index = 0

2558

download_start_time = ctx.get('start') or time.time()

2559

2560

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2561

if lack_early_segments:

2562

self.report_warning(bug_reports_message(

2563

'Starting download from the last 120 hours of the live stream since '

2564

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2565

lack_early_segments = True

2566

2567

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2568

fragments, fragment_base_url = None, None

2569

2570

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2571

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2572

# Obtain from MPD's maximum seq value

2573

old_mpd_url = mpd_url

2574

last_error = ctx.pop('last_error', None)

2575

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2576

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2577

or (mpd_url, stream_number, False))

2578

if not refresh_sequence:

2579

if expire_fast and not is_live:

2580

return False, last_seq

2581

elif old_mpd_url == mpd_url:

2582

return True, last_seq

2583

if manifestless_orig_fmt:

2584

fmt_info = manifestless_orig_fmt

2585

else:

2586

try:

2587

fmts, _ = self._extract_mpd_formats_and_subtitles(

2588

mpd_url, None, note=False, errnote=False, fatal=False)

2589

except ExtractorError:

2590

fmts = None

2591

if not fmts:

2592

no_fragment_score += 2

2593

return False, last_seq

2594

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2595

fragments = fmt_info['fragments']

2596

fragment_base_url = fmt_info['fragment_base_url']

2597

assert fragment_base_url

2598

2599

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2600

return True, _last_seq

2601

2602

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2603

while is_live:

2604

fetch_time = time.time()

2605

if no_fragment_score > 30:

2606

return

2607

if last_segment_url:

2608

# Obtain from "X-Head-Seqnum" header value from each segment

2609

try:

2610

urlh = self._request_webpage(

2611

last_segment_url, None, note=False, errnote=False, fatal=False)

2612

except ExtractorError:

2613

urlh = None

2614

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2615

if last_seq is None:

2616

no_fragment_score += 2

2617

last_segment_url = None

2618

continue

2619

else:

2620

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2621

no_fragment_score += 2

2622

if not should_continue:

2623

continue

2624

2625

if known_idx > last_seq:

2626

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2632

# skip from the start when it's negative value

2633

known_idx = last_seq + begin_index

2634

if lack_early_segments:

2635

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2636

try:

2637

for idx in range(known_idx, last_seq):

2638

# do not update sequence here or you'll get skipped some part of it

2639

should_continue, _ = _extract_sequence_from_mpd(False, False)

2640

if not should_continue:

2641

known_idx = idx - 1

2642

raise ExtractorError('breaking out of outer loop')

2643

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2644

yield {

2645

'url': last_segment_url,

2646

'fragment_count': last_seq,

2647

}

2648

if known_idx == last_seq:

2649

no_fragment_score += 5

2650

else:

2651

no_fragment_score = 0

2652

known_idx = last_seq

2653

except ExtractorError:

2654

continue

2655

2656

if manifestless_orig_fmt:

2657

# Stop at the first iteration if running for post-live manifestless;

2658

# fragment count no longer increase since it starts

2659

break

2660

2661

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2662

2663

def _extract_player_url(self, *ytcfgs, webpage=None):

2664

player_url = traverse_obj(

2665

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2666

get_all=False, expected_type=str)

2667

if not player_url:

2668

return

2669

return urljoin('https://www.youtube.com', player_url)

2670

2671

def _download_player_url(self, video_id, fatal=False):

2672

res = self._download_webpage(

2673

'https://www.youtube.com/iframe_api',

2674

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2675

if res:

2676

player_version = self._search_regex(

2677

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2678

if player_version:

2679

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2680

2681

def _signature_cache_id(self, example_sig):

2682

""" Return a string representation of a signature """

2683

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2684

2685

@classmethod

2686

def _extract_player_info(cls, player_url):

2687

for player_re in cls._PLAYER_INFO_RE:

2688

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2693

return id_m.group('id')

2694

2695

def _load_player(self, video_id, player_url, fatal=True):

2696

player_id = self._extract_player_info(player_url)

2697

if player_id not in self._code_cache:

2698

code = self._download_webpage(

2699

player_url, video_id, fatal=fatal,

2700

note='Downloading player ' + player_id,

2701

errnote='Download of %s failed' % player_url)

2702

if code:

2703

self._code_cache[player_id] = code

2704

return self._code_cache.get(player_id)

2705

2706

def _extract_signature_function(self, video_id, player_url, example_sig):

2707

player_id = self._extract_player_info(player_url)

2708

2709

# Read from filesystem cache

2710

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2711

assert os.path.basename(func_id) == func_id

2712

2713

self.write_debug(f'Extracting signature function {func_id}')

2714

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2715

2716

if not cache_spec:

2717

code = self._load_player(video_id, player_url)

2718

if code:

2719

res = self._parse_sig_js(code)

2720

test_string = ''.join(map(chr, range(len(example_sig))))

2721

cache_spec = [ord(c) for c in res(test_string)]

2722

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2723

2724

return lambda s: ''.join(s[i] for i in cache_spec)

2725

2726

def _print_sig_code(self, func, example_sig):

2727

if not self.get_param('youtube_print_sig_code'):

2728

return

2729

2730

def gen_sig_code(idxs):

2731

def _genslice(start, end, step):

2732

starts = '' if start == 0 else str(start)

2733

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2734

steps = '' if step == 1 else (':%d' % step)

2735

return f's[{starts}{ends}{steps}]'

2736

2737

step = None

2738

# Quelch pyflakes warnings - start will be set when step is set

2739

start = '(Never used)'

2740

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2745

step = None

2746

continue

2747

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2757

2758

test_string = ''.join(map(chr, range(len(example_sig))))

2759

cache_res = func(test_string)

2760

cache_spec = [ord(c) for c in cache_res]

2761

expr_code = ' + '.join(gen_sig_code(cache_spec))

2762

signature_id_tuple = '(%s)' % (

2763

', '.join(str(len(p)) for p in example_sig.split('.')))

2764

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2765

' return %s\n') % (signature_id_tuple, expr_code)

2766

self.to_screen('Extracted signature function:\n' + code)

2767

2768

def _parse_sig_js(self, jscode):

2769

funcname = self._search_regex(

2770

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2771

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2772

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2773

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2774

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2775

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2776

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2777

# Obsolete patterns

2778

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2779

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2780

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2781

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2782

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2783

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2784

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2785

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2786

jscode, 'Initial JS player signature function name', group='sig')

2787

2788

jsi = JSInterpreter(jscode)

2789

initial_function = jsi.extract_function(funcname)

2790

return lambda s: initial_function([s])

2791

2792

def _cached(self, func, *cache_id):

2793

def inner(*args, **kwargs):

2794

if cache_id not in self._player_cache:

2795

try:

2796

self._player_cache[cache_id] = func(*args, **kwargs)

2797

except ExtractorError as e:

2798

self._player_cache[cache_id] = e

2799

except Exception as e:

2800

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2801

2802

ret = self._player_cache[cache_id]

2803

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2809

"""Turn the encrypted s field into a working signature"""

2810

extract_sig = self._cached(

2811

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2812

func = extract_sig(video_id, player_url, s)

2813

self._print_sig_code(func, s)

2814

return func(s)

2815

2816

def _decrypt_nsig(self, s, video_id, player_url):

2817

"""Turn the encrypted n field into a working signature"""

2818

if player_url is None:

2819

raise ExtractorError('Cannot decrypt nsig without player_url')

2820

player_url = urljoin('https://www.youtube.com', player_url)

2821

2822

try:

2823

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2824

except ExtractorError as e:

2825

raise ExtractorError('Unable to extract nsig function code', cause=e)

2826

if self.get_param('youtube_print_sig_code'):

2827

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2828

2829

try:

2830

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2831

ret = extract_nsig(jsi, func_code)(s)

2832

except JSInterpreter.Exception as e:

2833

try:

2834

jsi = PhantomJSwrapper(self, timeout=5000)

2835

except ExtractorError:

2836

raise e

2837

self.report_warning(

2838

f'Native nsig extraction failed: Trying with PhantomJS\n'

2839

f' n = {s} ; player = {player_url}', video_id)

2840

self.write_debug(e, only_once=True)

2841

2842

args, func_body = func_code

2843

ret = jsi.execute(

2844

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2845

video_id=video_id, note='Executing signature code').strip()

2846

2847

self.write_debug(f'Decrypted nsig {s} => {ret}')

2848

return ret

2849

2850

def _extract_n_function_name(self, jscode):

2851

funcname, idx = self._search_regex(

2852

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2853

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2858

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2859

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2860

2861

def _extract_n_function_code(self, video_id, player_url):

2862

player_id = self._extract_player_info(player_url)

2863

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

2864

jscode = func_code or self._load_player(video_id, player_url)

2865

jsi = JSInterpreter(jscode)

2866

2867

if func_code:

2868

return jsi, player_id, func_code

2869

2870

func_name = self._extract_n_function_name(jscode)

2871

2872

# For redundancy

2873

func_code = self._search_regex(

2874

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

2875

# NB: The end of the regex is intentionally kept strict

2876

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

2877

jscode, 'nsig function', group=('var', 'code'), default=None)

2878

if func_code:

2879

func_code = ([func_code[0]], func_code[1])

2880

else:

2881

self.write_debug('Extracting nsig function with jsinterp')

2882

func_code = jsi.extract_function_code(func_name)

2883

2884

self.cache.store('youtube-nsig', player_id, func_code)

2885

return jsi, player_id, func_code

2886

2887

def _extract_n_function_from_code(self, jsi, func_code):

2888

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2894

raise

2895

except Exception as e:

2896

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2897

2898

if ret.startswith('enhanced_except_'):

2899

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2905

"""

2906

Extract signatureTimestamp (sts)

2907

Required to tell API what sig/player version is in use.

2908

"""

2909

sts = None

2910

if isinstance(ytcfg, dict):

2911

sts = int_or_none(ytcfg.get('STS'))

2912

2913

if not sts:

2914

# Attempt to extract from player

2915

if player_url is None:

2916

error_msg = 'Cannot extract signature timestamp without player_url.'

2917

if fatal:

2918

raise ExtractorError(error_msg)

2919

self.report_warning(error_msg)

2920

return

2921

code = self._load_player(video_id, player_url, fatal=fatal)

2922

if code:

2923

sts = int_or_none(self._search_regex(

2924

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2925

'JS player signature timestamp', group='sts', fatal=fatal))

2926

return sts

2927

2928

def _mark_watched(self, video_id, player_responses):

2929

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2930

label = 'fully ' if is_full else ''

2931

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2932

expected_type=url_or_none)

2933

if not url:

2934

self.report_warning(f'Unable to mark {label}watched')

2935

return

2936

parsed_url = urllib.parse.urlparse(url)

2937

qs = urllib.parse.parse_qs(parsed_url.query)

2938

2939

# cpn generation algorithm is reverse engineered from base.js.

2940

# In fact it works even with dummy cpn.

2941

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2942

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2943

2944

# # more consistent results setting it to right before the end

2945

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2956

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2963

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2964

2965

self._download_webpage(

2966

url, video_id, f'Marking {label}watched',

2967

'Unable to mark watched', fatal=False)

2968

2969

@classmethod

2970

def _extract_from_webpage(cls, url, webpage):

2971

# Invidious Instances

2972

# https://github.com/yt-dlp/yt-dlp/issues/195

2973

# https://github.com/iv-org/invidious/pull/1730

2974

mobj = re.search(

2975

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2976

webpage)

2977

if mobj:

2978

yield cls.url_result(mobj.group('url'), cls)

2979

raise cls.StopExtraction()

2980

2981

yield from super()._extract_from_webpage(url, webpage)

2982

2983

# lazyYT YouTube embed

2984

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2985

yield cls.url_result(unescapeHTML(id_), cls, id_)

2986

2987

# Wordpress "YouTube Video Importer" plugin

2988

for m in re.findall(r'''(?x)<div[^>]+

2989

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2990

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2991

yield cls.url_result(m[-1], cls, m[-1])

2992

2993

@classmethod

2994

def extract_id(cls, url):

2995

video_id = cls.get_temp_id(url)

2996

if not video_id:

2997

raise ExtractorError(f'Invalid URL: {url}')

2998

return video_id

2999

3000

def _extract_chapters_from_json(self, data, duration):

3001

chapter_list = traverse_obj(

3002

data, (

3003

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3004

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3005

), expected_type=list)

3006

3007

return self._extract_chapters(

3008

chapter_list,

3009

chapter_time=lambda chapter: float_or_none(

3010

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3011

chapter_title=lambda chapter: traverse_obj(

3012

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3013

duration=duration)

3014

3015

def _extract_chapters_from_engagement_panel(self, data, duration):

3016

content_list = traverse_obj(

3017

data,

3018

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3019

expected_type=list, default=[])

3020

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3021

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3022

3023

return next(filter(None, (

3024

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3025

chapter_time, chapter_title, duration)

3026

for contents in content_list)), [])

3027

3028

def _extract_chapters_from_description(self, description, duration):

3029

return self._extract_chapters(

3030

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

3031

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3032

duration=duration, strict=False)

3033

3034

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3039

'title': chapter_title(chapter),

3040

} for chapter in chapter_list or []]

3041

if not strict:

3042

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3043

3044

chapters = [{'start_time': 0}]

3045

for idx, chapter in enumerate(chapter_list):

3046

if chapter['start_time'] is None:

3047

self.report_warning(f'Incomplete chapter {idx}')

3048

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3049

chapters.append(chapter)

3050

elif chapter not in chapters:

3051

self.report_warning(

3052

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3053

return chapters[1:]

3054

3055

def _extract_comment(self, comment_renderer, parent=None):

3056

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3061

3062

# Timestamp is an estimate calculated from the current time and time_text

3063

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3064

timestamp = self._parse_time_text(time_text)

3065

3066

author = self._get_text(comment_renderer, 'authorText')

3067

author_id = try_get(comment_renderer,

3068

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3069

3070

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3071

lambda x: x['likeCount']), str)) or 0

3072

author_thumbnail = try_get(comment_renderer,

3073

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3074

3075

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3076

is_favorited = 'creatorHeart' in (try_get(

3077

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3082

'time_text': time_text,

3083

'like_count': votes,

3084

'is_favorited': is_favorited,

3085

'author': author,

3086

'author_id': author_id,

3087

'author_thumbnail': author_thumbnail,

3088

'author_is_uploader': author_is_uploader,

3089

'parent': parent or 'root'

3090

}

3091

3092

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3093

3094

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3095

3096

def extract_header(contents):

3097

_continuation = None

3098

for content in contents:

3099

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3100

expected_comment_count = self._get_count(

3101

comments_header_renderer, 'countText', 'commentsCount')

3102

3103

if expected_comment_count:

3104

tracker['est_total'] = expected_comment_count

3105

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3106

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3107

3108

sort_menu_item = try_get(

3109

comments_header_renderer,

3110

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3111

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3112

3113

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3114

if not _continuation:

3115

continue

3116

3117

sort_text = str_or_none(sort_menu_item.get('title'))

3118

if not sort_text:

3119

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3120

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3125

if not parent:

3126

tracker['current_page_thread'] = 0

3127

for content in contents:

3128

if not parent and tracker['total_parent_comments'] >= max_parents:

3129

yield

3130

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3131

comment_renderer = get_first(

3132

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3133

expected_type=dict, default={})

3134

3135

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3140

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3141

yield comment

3142

3143

# Attempt to get the replies

3144

comment_replies_renderer = try_get(

3145

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3146

3147

if comment_replies_renderer:

3148

tracker['current_page_thread'] += 1

3149

comment_entries_iter = self._comment_entries(

3150

comment_replies_renderer, ytcfg, video_id,

3151

parent=comment.get('id'), tracker=tracker)

3152

yield from itertools.islice(comment_entries_iter, min(

3153

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3154

3155

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3161

total_parent_comments=0,

3162

total_reply_comments=0)

3163

3164

# TODO: Deprecated

3165

# YouTube comments have a max depth of 2

3166

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3167

if max_depth:

3168

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3169

'Set max replies in the max-comments extractor argument instead')

3170

if max_depth == 1 and parent:

3171

return

3172

3173

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3174

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3175

3176

continuation = self._extract_continuation(root_continuation_data)

3177

3178

response = None

3179

is_forced_continuation = False

3180

is_first_continuation = parent is None

3181

if is_first_continuation and not continuation:

3182

# Sometimes you can get comments by generating the continuation yourself,

3183

# even if YouTube initially reports them being disabled - e.g. stories comments.

3184

# Note: if the comment section is actually disabled, YouTube may return a response with

3185

# required check_get_keys missing. So we will disable that check initially in this case.

3186

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3187

is_forced_continuation = True

3188

3189

for page_num in itertools.count(0):

3190

if not continuation:

3191

break

3192

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3193

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3194

if page_num == 0:

3195

if is_first_continuation:

3196

note_prefix = 'Downloading comment section API JSON'

3197

else:

3198

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3199

tracker['current_page_thread'], comment_prog_str)

3200

else:

3201

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3202

' ' if parent else '', ' replies' if parent else '',

3203

page_num, comment_prog_str)

3204

3205

response = self._extract_response(

3206

item_id=None, query=continuation,

3207

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3208

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3209

is_forced_continuation = False

3210

continuation_contents = traverse_obj(

3211

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3212

3213

continuation = None

3214

for continuation_section in continuation_contents:

3215

continuation_items = traverse_obj(

3216

continuation_section,

3217

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3218

get_all=False, expected_type=list) or []

3219

if is_first_continuation:

3220

continuation = extract_header(continuation_items)

3221

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3235

if message and not parent and tracker['running_total'] == 0:

3236

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3237

3238

@staticmethod

3239

def _generate_comment_continuation(video_id):

3240

"""

3241

Generates initial comment section continuation token from given video id

3242

"""

3243

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3244

return base64.b64encode(token.encode()).decode()

3245

3246

def _get_comments(self, ytcfg, video_id, contents, webpage):

3247

"""Entry for comment extraction"""

3248

def _real_comment_extract(contents):

3249

renderer = next((

3250

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3251

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3252

yield from self._comment_entries(renderer, ytcfg, video_id)

3253

3254

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3255

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3256

3257

@staticmethod

3258

def _get_checkok_params():

3259

return {'contentCheckOk': True, 'racyCheckOk': True}

3260

3261

@classmethod

3262

def _generate_player_context(cls, sts=None):

3263

context = {

3264

'html5Preference': 'HTML5_PREF_WANTS',

3265

}

3266

if sts is not None:

3267

context['signatureTimestamp'] = sts

3268

return {

3269

'playbackContext': {

3270

'contentPlaybackContext': context

3271

},

3272

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3277

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3278

return True

3279

3280

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3281

AGE_GATE_REASONS = (

3282

'confirm your age', 'age-restricted', 'inappropriate', # reason

3283

'age_verification_required', 'age_check_required', # status

3284

)

3285

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3286

3287

@staticmethod

3288

def _is_unplayable(player_response):

3289

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3290

3291

_STORY_PLAYER_PARAMS = '8AEB'

3292

3293

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3294

3295

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3296

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3297

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3298

headers = self.generate_api_headers(

3299

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3305

yt_query['params'] = self._STORY_PLAYER_PARAMS

3306

3307

yt_query.update(self._generate_player_context(sts))

3308

return self._extract_response(

3309

item_id=video_id, ep='player', query=yt_query,

3310

ytcfg=player_ytcfg, headers=headers, fatal=True,

3311

default_client=client,

3312

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3313

) or None

3314

3315

def _get_requested_clients(self, url, smuggled_data):

3316

requested_clients = []

3317

default = ['android', 'web']

3318

allowed_clients = sorted(

3319

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3320

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3321

for client in self._configuration_arg('player_client'):

3322

if client in allowed_clients:

3323

requested_clients.append(client)

3324

elif client == 'default':

3325

requested_clients.extend(default)

3326

elif client == 'all':

3327

requested_clients.extend(allowed_clients)

3328

else:

3329

self.report_warning(f'Skipping unsupported client {client}')

3330

if not requested_clients:

3331

requested_clients = default

3332

3333

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3334

requested_clients.extend(

3335

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3336

3337

return orderedSet(requested_clients)

3338

3339

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3340

initial_pr = None

3341

if webpage:

3342

initial_pr = self._search_json(

3343

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3344

3345

all_clients = set(clients)

3346

clients = clients[::-1]

3347

prs = []

3348

3349

def append_client(*client_names):

3350

""" Append the first client name that exists but not already used """

3351

for client_name in client_names:

3352

actual_client = _split_innertube_client(client_name)[0]

3353

if actual_client in INNERTUBE_CLIENTS:

3354

if actual_client not in all_clients:

3355

clients.append(client_name)

3356

all_clients.add(actual_client)

3357

return

3358

3359

# Android player_response does not have microFormats which are needed for

3360

# extraction of some data. So we return the initial_pr with formats

3361

# stripped out even if not requested by the user

3362

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3363

if initial_pr:

3364

pr = dict(initial_pr)

3365

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3370

player_url = None

3371

while clients:

3372

client, base_client, variant = _split_innertube_client(clients.pop())

3373

player_ytcfg = master_ytcfg if client == 'web' else {}

3374

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3375

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3376

3377

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3378

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3379

if 'js' in self._configuration_arg('player_skip'):

3380

require_js_player = False

3381

player_url = None

3382

3383

if not player_url and not tried_iframe_fallback and require_js_player:

3384

player_url = self._download_player_url(video_id)

3385

tried_iframe_fallback = True

3386

3387

try:

3388

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3389

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3390

except ExtractorError as e:

3391

if last_error:

3392

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3398

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3399

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3400

if pr_video_id and pr_video_id != video_id:

3401

self.report_warning(

3402

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3407

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3408

append_client(f'{base_client}_creator')

3409

elif self._is_agegated(pr):

3410

if variant == 'tv_embedded':

3411

append_client(f'{base_client}_embedded')

3412

elif not variant:

3413

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3419

return prs, player_url

3420

3421

def _needs_live_processing(self, live_status, duration):

3422

if (live_status == 'is_live' and self.get_param('live_from_start')

3423

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3424

return live_status

3425

3426

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3427

itags, stream_ids = {}, []

3428

itag_qualities, res_qualities = {}, {0: None}

3429

q = qualities([

3430

# Normally tiny is the smallest video-only formats. But

3431

# audio-only formats with unknown quality may get tagged as tiny

3432

'tiny',

3433

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3434

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3435

])

3436

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3437

3438

for fmt in streaming_formats:

3439

if fmt.get('targetDurationSec'):

3440

continue

3441

3442

itag = str_or_none(fmt.get('itag'))

3443

audio_track = fmt.get('audioTrack') or {}

3444

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3445

if stream_id in stream_ids:

3446

continue

3447

3448

quality = fmt.get('quality')

3449

height = int_or_none(fmt.get('height'))

3450

if quality == 'tiny' or not quality:

3451

quality = fmt.get('audioQuality', '').lower() or quality

3452

# The 3gp format (17) in android client has a quality of "small",

3453

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3459

if height:

3460

res_qualities[height] = quality

3461

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3462

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3463

# number of fragment that would subsequently requested with (`&sq=N`)

3464

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3465

continue

3466

3467

fmt_url = fmt.get('url')

3468

if not fmt_url:

3469

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3470

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3471

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3472

if not all((sc, fmt_url, player_url, encrypted_sig)):

3473

continue

3474

try:

3475

fmt_url += '&%s=%s' % (

3476

traverse_obj(sc, ('sp', -1)) or 'signature',

3477

self._decrypt_signature(encrypted_sig, video_id, player_url)

3478

)

3479

except ExtractorError as e:

3480

self.report_warning('Signature extraction failed: Some formats may be missing',

3481

video_id=video_id, only_once=True)

3482

self.write_debug(e, only_once=True)

3483

continue

3484

3485

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3490

fmt_url = update_url_query(fmt_url, {

3491

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3492

})

3493

except ExtractorError as e:

3494

phantomjs_hint = ''

3495

if isinstance(e, JSInterpreter.Exception):

3496

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3497

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3498

if player_url:

3499

self.report_warning(

3500

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3501

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3502

self.write_debug(e, only_once=True)

3503

else:

3504

self.report_warning(

3505

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3506

video_id=video_id, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3511

stream_ids.append(stream_id)

3512

3513

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3514

language_preference = (

3515

10 if audio_track.get('audioIsDefault') and 10

3516

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3517

else -1)

3518

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3519

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3520

# Make sure to avoid false positives with small duration differences.

3521

# E.g. __2ABJjxzNo, ySuUZEjARPY

3522

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3523

if is_damaged:

3524

self.report_warning(

3525

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3526

dct = {

3527

'asr': int_or_none(fmt.get('audioSampleRate')),

3528

'filesize': int_or_none(fmt.get('contentLength')),

3529

'format_id': itag,

3530

'format_note': join_nonempty(

3531

'%s%s' % (audio_track.get('displayName') or '',

3532

' (default)' if language_preference > 0 else ''),

3533

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3534

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3535

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3536

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3537

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3538

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3539

'fps': int_or_none(fmt.get('fps')) or None,

3540

'audio_channels': fmt.get('audioChannels'),

3541

'height': height,

3542

'quality': q(quality),

3543

'has_drm': bool(fmt.get('drmFamilies')),

3544

'tbr': tbr,

3545

'url': fmt_url,

3546

'width': int_or_none(fmt.get('width')),

3547

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3548

'desc' if language_preference < -1 else ''),

3549

'language_preference': language_preference,

3550

# Strictly de-prioritize damaged and 3gp formats

3551

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3552

}

3553

mime_mobj = re.match(

3554

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3555

if mime_mobj:

3556

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3557

dct.update(parse_codecs(mime_mobj.group(2)))

3558

no_audio = dct.get('acodec') == 'none'

3559

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3565

dct['downloader_options'] = {

3566

# Youtube throttles chunks >~10M

3567

'http_chunk_size': 10485760,

3568

}

3569

if dct.get('ext'):

3570

dct['container'] = dct['ext'] + '_dash'

3571

yield dct

3572

3573

needs_live_processing = self._needs_live_processing(live_status, duration)

3574

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3575

3576

skip_manifests = set(self._configuration_arg('skip'))

3577

if (not self.get_param('youtube_include_hls_manifest', True)

3578

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3579

or needs_live_processing and skip_bad_formats):

3580

skip_manifests.add('hls')

3581

3582

if not self.get_param('youtube_include_dash_manifest', True):

3583

skip_manifests.add('dash')

3584

if self._configuration_arg('include_live_dash'):

3585

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3586

'Use include_incomplete_formats extractor argument instead')

3587

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3588

skip_manifests.add('dash')

3589

3590

def process_manifest_format(f, proto, itag):

3591

if itag in itags:

3592

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3593

return False

3594

itag = f'{itag}-{proto}'

3595

if itag:

3596

f['format_id'] = itag

3597

itags[itag] = proto

3598

3599

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3600

if f['quality'] == -1 and f.get('height'):

3601

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3606

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3607

if hls_manifest_url:

3608

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3609

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3610

subtitles = self._merge_subtitles(subs, subtitles)

3611

for f in fmts:

3612

if process_manifest_format(f, 'hls', self._search_regex(

3613

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3614

yield f

3615

3616

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3617

if dash_manifest_url:

3618

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3619

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3620

for f in formats:

3621

if process_manifest_format(f, 'dash', f['format_id']):

3622

f['filesize'] = int_or_none(self._search_regex(

3623

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3624

if needs_live_processing:

3625

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3631

spec = get_first(

3632

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3633

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3638

args = args.split('#')

3639

counts = list(map(int_or_none, args[:5]))

3640

if len(args) != 8 or not all(counts):

3641

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3642

continue

3643

width, height, frame_count, cols, rows = counts

3644

N, sigh = args[6:]

3645

3646

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3647

fragment_count = frame_count / (cols * rows)

3648

fragment_duration = duration / fragment_count

3649

yield {

3650

'format_id': f'sb{i}',

3651

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3664

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3665

} for j in range(math.ceil(fragment_count))],

3666

}

3667

3668

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3669

webpage = None

3670

if 'webpage' not in self._configuration_arg('player_skip'):

3671

query = {'bpctr': '9999999999', 'has_verified': '1'}

3672

if smuggled_data.get('is_story'):

3673

query['pp'] = self._STORY_PLAYER_PARAMS

3674

webpage = self._download_webpage(

3675

webpage_url, video_id, fatal=False, query=query)

3676

3677

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3678

3679

player_responses, player_url = self._extract_player_responses(

3680

self._get_requested_clients(url, smuggled_data),

3681

video_id, webpage, master_ytcfg, smuggled_data)

3682

3683

return webpage, master_ytcfg, player_responses, player_url

3684

3685

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3686

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3687

is_live = get_first(video_details, 'isLive')

3688

if is_live is None:

3689

is_live = get_first(live_broadcast_details, 'isLiveNow')

3690

live_content = get_first(video_details, 'isLiveContent')

3691

is_upcoming = get_first(video_details, 'isUpcoming')

3692

post_live = get_first(video_details, 'isPostLiveDvr')

3693

live_status = ('post_live' if post_live

3694

else 'is_live' if is_live

3695

else 'is_upcoming' if is_upcoming

3696

else 'was_live' if live_content

3697

else 'not_live' if False in (is_live, live_content)

3698

else None)

3699

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3700

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3701

3702

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3703

3704

def _real_extract(self, url):

3705

url, smuggled_data = unsmuggle_url(url, {})

3706

video_id = self._match_id(url)

3707

3708

base_url = self.http_scheme() + '//www.youtube.com/'

3709

webpage_url = base_url + 'watch?v=' + video_id

3710

3711

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3712

3713

playability_statuses = traverse_obj(

3714

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3715

3716

trailer_video_id = get_first(

3717

playability_statuses,

3718

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3719

expected_type=str)

3720

if trailer_video_id:

3721

return self.url_result(

3722

trailer_video_id, self.ie_key(), trailer_video_id)

3723

3724

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3725

if webpage else (lambda x: None))

3726

3727

video_details = traverse_obj(

3728

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3729

microformats = traverse_obj(

3730

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3731

expected_type=dict, default=[])

3732

3733

translated_title = self._get_text(microformats, (..., 'title'))

3734

video_title = (self._preferred_lang and translated_title

3735

or get_first(video_details, 'title') # primary

3736

or translated_title

3737

or search_meta(['og:title', 'twitter:title', 'title']))

3738

translated_description = self._get_text(microformats, (..., 'description'))

3739

original_description = get_first(video_details, 'shortDescription')

3740

video_description = (

3741

self._preferred_lang and translated_description

3742

# If original description is blank, it will be an empty string.

3743

# Do not prefer translated description in this case.

3744

or original_description if original_description is not None else translated_description)

3745

3746

multifeed_metadata_list = get_first(

3747

player_responses,

3748

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3749

expected_type=str)

3750

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3751

if self.get_param('noplaylist'):

3752

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3757

# Unquote should take place before split on comma (,) since textual

3758

# fields may contain comma as well (see

3759

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3760

feed_data = urllib.parse.parse_qs(

3761

urllib.parse.unquote_plus(feed))

3762

3763

def feed_entry(name):

3764

return try_get(

3765

feed_data, lambda x: x[name][0], str)

3766

3767

feed_id = feed_entry('id')

3768

if not feed_id:

3769

continue

3770

feed_title = feed_entry('title')

3771

title = video_title

3772

if feed_title:

3773

title += ' (%s)' % feed_title

3774

entries.append({

3775

'_type': 'url_transparent',

3776

'ie_key': 'Youtube',

3777

'url': smuggle_url(

3778

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3779

{'force_singlefeed': True}),

3780

'title': title,

3781

})

3782

feed_ids.append(feed_id)

3783

self.to_screen(

3784

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3785

% (', '.join(feed_ids), video_id))

3786

return self.playlist_result(

3787

entries, video_id, video_title, video_description)

3788

3789

duration = int_or_none(

3790

get_first(video_details, 'lengthSeconds')

3791

or get_first(microformats, 'lengthSeconds')

3792

or parse_duration(search_meta('duration'))) or None

3793

3794

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3795

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3796

if live_status == 'post_live':

3797

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3798

3799

if not formats:

3800

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3801

self.report_drm(video_id)

3802

pemr = get_first(

3803

playability_statuses,

3804

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3805

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3806

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3807

if subreason:

3808

if subreason == 'The uploader has not made this video available in your country.':

3809

countries = get_first(microformats, 'availableCountries')

3810

if not countries:

3811

regions_allowed = search_meta('regionsAllowed')

3812

countries = regions_allowed.split(',') if regions_allowed else None

3813

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3814

reason += f'. {subreason}'

3815

if reason:

3816

self.raise_no_formats(reason, expected=True)

3817

3818

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3819

if not keywords and webpage:

3820

keywords = [

3821

unescapeHTML(m.group('content'))

3822

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3823

for keyword in keywords:

3824

if keyword.startswith('yt:stretch='):

3825

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3826

if mobj:

3827

# NB: float is intentional for forcing float division

3828

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3833

f['stretched_ratio'] = ratio

3834

break

3835

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3836

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3837

if thumbnail_url:

3838

thumbnails.append({

3839

'url': thumbnail_url,

3840

})

3841

original_thumbnails = thumbnails.copy()

3842

3843

# The best resolution thumbnails sometimes does not appear in the webpage

3844

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3845

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3846

thumbnail_names = [

3847

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3848

# in resolution, these are not the custom thumbnail. So de-prioritize them

3849

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3850

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3851

]

3852

n_thumbnail_names = len(thumbnail_names)

3853

thumbnails.extend({

3854

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3855

video_id=video_id, name=name, ext=ext,

3856

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

3857

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3858

for thumb in thumbnails:

3859

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3860

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3861

self._remove_duplicate_formats(thumbnails)

3862

self._downloader._sort_thumbnails(original_thumbnails)

3863

3864

category = get_first(microformats, 'category') or search_meta('genre')

3865

channel_id = str_or_none(

3866

get_first(video_details, 'channelId')

3867

or get_first(microformats, 'externalChannelId')

3868

or search_meta('channelId'))

3869

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3870

3871

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3872

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3873

if not duration and live_end_time and live_start_time:

3874

duration = live_end_time - live_start_time

3875

3876

needs_live_processing = self._needs_live_processing(live_status, duration)

3877

3878

def is_bad_format(fmt):

3879

if needs_live_processing and not fmt.get('is_from_start'):

3880

return True

3881

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

3882

and fmt.get('protocol') == 'http_dash_segments'):

3883

return True

3884

3885

for fmt in filter(is_bad_format, formats):

3886

fmt['preference'] = (fmt.get('preference') or -1) - 10

3887

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

3888

3889

if needs_live_processing:

3890

self._prepare_live_from_start_formats(

3891

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

3892

3893

formats.extend(self._extract_storyboard(player_responses, duration))

3894

3895

# source_preference is lower for throttled/potentially damaged formats

3896

self._sort_formats(formats, (

3897

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3902

'formats': formats,

3903

'thumbnails': thumbnails,

3904

# The best thumbnail that we are sure exists. Prevents unnecessary

3905

# URL checking if user don't care about getting the best possible thumbnail

3906

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3907

'description': video_description,

3908

'uploader': get_first(video_details, 'author'),

3909

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3910

'uploader_url': owner_profile_url,

3911

'channel_id': channel_id,

3912

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3913

'duration': duration,

3914

'view_count': int_or_none(

3915

get_first((video_details, microformats), (..., 'viewCount'))

3916

or search_meta('interactionCount')),

3917

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3918

'age_limit': 18 if (

3919

get_first(microformats, 'isFamilySafe') is False

3920

or search_meta('isFamilyFriendly') == 'false'

3921

or search_meta('og:restrictions:age') == '18+') else 0,

3922

'webpage_url': webpage_url,

3923

'categories': [category] if category else None,

3924

'tags': keywords,

3925

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3926

'live_status': live_status,

3927

'release_timestamp': live_start_time,

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3932

if pctr:

3933

def get_lang_code(track):

3934

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3935

or track.get('languageCode'))

3936

3937

# Converted into dicts to remove duplicates

3938

captions = {

3939

get_lang_code(sub): sub

3940

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3941

translation_languages = {

3942

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3943

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3944

3945

def process_language(container, base_url, lang_code, sub_name, query):

3946

lang_subs = container.setdefault(lang_code, [])

3947

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3958

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3959

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3960

for lang_code, caption_track in captions.items():

3961

base_url = caption_track.get('baseUrl')

3962

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3963

if not base_url:

3964

continue

3965

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3966

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3971

if not caption_track.get('isTranslatable'):

3972

continue

3973

for trans_code, trans_name in translation_languages.items():

3974

if not trans_code:

3975

continue

3976

orig_trans_code = trans_code

3977

if caption_track.get('kind') != 'asr':

3978

if not get_translated_subs:

3979

continue

3980

trans_code += f'-{lang_code}'

3981

trans_name += format_field(lang_name, None, ' from %s')

3982

# Add an "-orig" label to the original language so that it can be distinguished.

3983

# The subs are returned without "-orig" as well for compatibility

3984

if lang_code == f'a-{orig_trans_code}':

3985

process_language(

3986

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3987

# Setting tlang=lang returns damaged subtitles.

3988

process_language(automatic_captions, base_url, trans_code, trans_name,

3989

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3990

3991

info['automatic_captions'] = automatic_captions

3992

info['subtitles'] = subtitles

3993

3994

parsed_url = urllib.parse.urlparse(url)

3995

for component in [parsed_url.fragment, parsed_url.query]:

3996

query = urllib.parse.parse_qs(component)

3997

for k, v in query.items():

3998

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3999

d_k += '_time'

4000

if d_k not in info and k in s_ks:

4001

info[d_k] = parse_duration(query[k][0])

4002

4003

# Youtube Music Auto-generated description

4004

if video_description:

4005

mobj = re.search(

4006

r'''(?xs)

4007

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4008

(?P<album>[^\n]+)

4009

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4010

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4011

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4012

.+\nAuto-generated\ by\ YouTube\.\s*$

4013

''', video_description)

4014

if mobj:

4015

release_year = mobj.group('release_year')

4016

release_date = mobj.group('release_date')

4017

if release_date:

4018

release_date = release_date.replace('-', '')

4019

if not release_year:

4020

release_year = release_date[:4]

4021

info.update({

4022

'album': mobj.group('album'.strip()),

4023

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4024

'track': mobj.group('track').strip(),

4025

'release_date': release_date,

4026

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4032

if not initial_data:

4033

query = {'videoId': video_id}

4034

query.update(self._get_checkok_params())

4035

initial_data = self._extract_response(

4036

item_id=video_id, ep='next', fatal=False,

4037

ytcfg=master_ytcfg, query=query,

4038

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4039

note='Downloading initial data API JSON')

4040

4041

info['comment_count'] = traverse_obj(initial_data, (

4042

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4043

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4044

), (

4045

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4046

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4047

), expected_type=int_or_none, get_all=False)

4048

4049

try: # This will error if there is no livechat

4050

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4051

except (KeyError, IndexError, TypeError):

4052

pass

4053

else:

4054

info.setdefault('subtitles', {})['live_chat'] = [{

4055

# url is needed to set cookies

4056

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4057

'video_id': video_id,

4058

'ext': 'json',

4059

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4060

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4066

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4067

or self._extract_chapters_from_description(video_description, duration)

4068

or None)

4069

4070

contents = traverse_obj(

4071

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4072

expected_type=list, default=[])

4073

4074

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4075

if vpir:

4076

stl = vpir.get('superTitleLink')

4077

if stl:

4078

stl = self._get_text(stl)

4079

if try_get(

4080

vpir,

4081

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4082

info['location'] = stl

4083

else:

4084

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4085

if mobj:

4086

info.update({

4087

'series': mobj.group(1),

4088

'season_number': int(mobj.group(2)),

4089

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4098

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4099

default=[]))

4100

for tbr in tbrs:

4101

for getter, regex in [(

4102

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4103

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4104

lambda x: x['accessibility'],

4105

lambda x: x['accessibilityData']['accessibilityData'],

4106

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4107

label = (try_get(tbr, getter, dict) or {}).get('label')

4108

if label:

4109

mobj = re.match(regex, label)

4110

if mobj:

4111

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4112

break

4113

sbr_tooltip = try_get(

4114

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4115

if sbr_tooltip:

4116

like_count, dislike_count = sbr_tooltip.split(' / ')

4117

info.update({

4118

'like_count': str_to_int(like_count),

4119

'dislike_count': str_to_int(dislike_count),

4120

})

4121

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4122

if vcr:

4123

vc = self._get_count(vcr, 'viewCount')

4124

# Upcoming premieres with waiting count are treated as live here

4125

if vcr.get('isLive'):

4126

info['concurrent_view_count'] = vc

4127

elif info.get('view_count') is None:

4128

info['view_count'] = vc

4129

4130

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4131

if vsir:

4132

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4133

info.update({

4134

'channel': self._get_text(vor, 'title'),

4135

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4140

list) or []

4141

multiple_songs = False

4142

for row in rows:

4143

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4144

multiple_songs = True

4145

break

4146

for row in rows:

4147

mrr = row.get('metadataRowRenderer') or {}

4148

mrr_title = mrr.get('title')

4149

if not mrr_title:

4150

continue

4151

mrr_title = self._get_text(mrr, 'title')

4152

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4153

if mrr_title == 'License':

4154

info['license'] = mrr_contents_text

4155

elif not multiple_songs:

4156

if mrr_title == 'Album':

4157

info['album'] = mrr_contents_text

4158

elif mrr_title == 'Artist':

4159

info['artist'] = mrr_contents_text

4160

elif mrr_title == 'Song':

4161

info['track'] = mrr_contents_text

4162

4163

fallbacks = {

4164

'channel': 'uploader',

4165

'channel_id': 'uploader_id',

4166

'channel_url': 'uploader_url',

4167

}

4168

4169

# The upload date for scheduled, live and past live streams / premieres in microformats

4170

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4171

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4172

upload_date = (

4173

unified_strdate(get_first(microformats, 'uploadDate'))

4174

or unified_strdate(search_meta('uploadDate')))

4175

if not upload_date or (

4176

live_status in ('not_live', None)

4177

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4178

):

4179

upload_date = strftime_or_none(

4180

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4181

info['upload_date'] = upload_date

4182

4183

for to, frm in fallbacks.items():

4184

if not info.get(to):

4185

info[to] = info.get(frm)

4186

4187

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4193

4194

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4195

or get_first(video_details, 'isPrivate', expected_type=bool))

4196

4197

info['availability'] = (

4198

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4199

else self._availability(

4200

is_private=is_private,

4201

needs_premium=(

4202

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4203

or False if initial_data and is_private is not None else None),

4204

needs_subscription=(

4205

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4206

or False if initial_data and is_private is not None else None),

4207

needs_auth=info['age_limit'] >= 18,

4208

is_unlisted=None if is_private is None else (

4209

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4210

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4211

4212

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4213

4214

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4220

4221

@staticmethod

4222

def passthrough_smuggled_data(func):

4223

def _smuggle(entries, smuggled_data):

4224

for entry in entries:

4225

# TODO: Convert URL to music.youtube instead.

4226

# Do we need to passthrough any other smuggled_data?

4227

entry['url'] = smuggle_url(entry['url'], smuggled_data)

4228

yield entry

4229

4230

@functools.wraps(func)

4231

def wrapper(self, url):

4232

url, smuggled_data = unsmuggle_url(url, {})

4233

if self.is_music_url(url):

4234

smuggled_data['is_music_url'] = True

4235

info_dict = func(self, url, smuggled_data)

4236

if smuggled_data and info_dict.get('entries'):

4237

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4242

channel_id = self._html_search_meta(

4243

'channelId', webpage, 'channel id', default=None)

4244

if channel_id:

4245

return channel_id

4246

channel_url = self._html_search_meta(

4247

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4248

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4249

'twitter:app:url:googleplay'), webpage, 'channel url')

4250

return self._search_regex(

4251

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4252

channel_url, 'channel id')

4253

4254

@staticmethod

4255

def _extract_basic_item_renderer(item):

4256

# Modified from _extract_grid_item_renderer

4257

known_basic_renderers = (

4258

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4259

)

4260

for key, renderer in item.items():

4261

if not isinstance(renderer, dict):

4262

continue

4263

elif key in known_basic_renderers:

4264

return renderer

4265

elif key.startswith('grid') and key.endswith('Renderer'):

4266

return renderer

4267

4268

def _grid_entries(self, grid_renderer):

4269

for item in grid_renderer['items']:

4270

if not isinstance(item, dict):

4271

continue

4272

renderer = self._extract_basic_item_renderer(item)

4273

if not isinstance(renderer, dict):

4274

continue

4275

title = self._get_text(renderer, 'title')

4276

4277

# playlist

4278

playlist_id = renderer.get('playlistId')

4279

if playlist_id:

4280

yield self.url_result(

4281

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4282

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4287

if video_id:

4288

yield self._extract_video(renderer)

4289

continue

4290

# channel

4291

channel_id = renderer.get('channelId')

4292

if channel_id:

4293

yield self.url_result(

4294

'https://www.youtube.com/channel/%s' % channel_id,

4295

ie=YoutubeTabIE.ie_key(), video_title=title)

4296

continue

4297

# generic endpoint URL support

4298

ep_url = urljoin('https://www.youtube.com/', try_get(

4299

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4300

str))

4301

if ep_url:

4302

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4303

if ie.suitable(ep_url):

4304

yield self.url_result(

4305

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4306

break

4307

4308

def _music_reponsive_list_entry(self, renderer):

4309

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4310

if video_id:

4311

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4312

ie=YoutubeIE.ie_key(), video_id=video_id)

4313

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4314

if playlist_id:

4315

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4316

if video_id:

4317

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4318

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4319

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4320

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4321

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4322

if browse_id:

4323

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4324

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4325

4326

def _shelf_entries_from_content(self, shelf_renderer):

4327

content = shelf_renderer.get('content')

4328

if not isinstance(content, dict):

4329

return

4330

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4331

if renderer:

4332

# TODO: add support for nested playlists so each shelf is processed

4333

# as separate playlist

4334

# TODO: this includes only first N items

4335

yield from self._grid_entries(renderer)

4336

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4342

ep = try_get(

4343

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4344

str)

4345

shelf_url = urljoin('https://www.youtube.com', ep)

4346

if shelf_url:

4347

# Skipping links to another channels, note that checking for

4348

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4349

# will not work

4350

if skip_channels and '/channels?' in shelf_url:

4351

return

4352

title = self._get_text(shelf_renderer, 'title')

4353

yield self.url_result(shelf_url, video_title=title)

4354

# Shelf may not contain shelf URL, fallback to extraction from content

4355

yield from self._shelf_entries_from_content(shelf_renderer)

4356

4357

def _playlist_entries(self, video_list_renderer):

4358

for content in video_list_renderer['contents']:

4359

if not isinstance(content, dict):

4360

continue

4361

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4362

if not isinstance(renderer, dict):

4363

continue

4364

video_id = renderer.get('videoId')

4365

if not video_id:

4366

continue

4367

yield self._extract_video(renderer)

4368

4369

def _rich_entries(self, rich_grid_renderer):

4370

renderer = traverse_obj(

4371

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4372

video_id = renderer.get('videoId')

4373

if not video_id:

4374

return

4375

yield self._extract_video(renderer)

4376

4377

def _video_entry(self, video_renderer):

4378

video_id = video_renderer.get('videoId')

4379

if video_id:

4380

return self._extract_video(video_renderer)

4381

4382

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4383

url = urljoin('https://youtube.com', traverse_obj(

4384

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4385

if url:

4386

return self.url_result(

4387

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4388

4389

def _post_thread_entries(self, post_thread_renderer):

4390

post_renderer = try_get(

4391

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4392

if not post_renderer:

4393

return

4394

# video attachment

4395

video_renderer = try_get(

4396

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4397

video_id = video_renderer.get('videoId')

4398

if video_id:

4399

entry = self._extract_video(video_renderer)

4400

if entry:

4401

yield entry

4402

# playlist attachment

4403

playlist_id = try_get(

4404

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4405

if playlist_id:

4406

yield self.url_result(

4407

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4408

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4409

# inline video links

4410

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4411

for run in runs:

4412

if not isinstance(run, dict):

4413

continue

4414

ep_url = try_get(

4415

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4416

if not ep_url:

4417

continue

4418

if not YoutubeIE.suitable(ep_url):

4419

continue

4420

ep_video_id = YoutubeIE._match_id(ep_url)

4421

if video_id == ep_video_id:

4422

continue

4423

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4424

4425

def _post_thread_continuation_entries(self, post_thread_continuation):

4426

contents = post_thread_continuation.get('contents')

4427

if not isinstance(contents, list):

4428

return

4429

for content in contents:

4430

renderer = content.get('backstagePostThreadRenderer')

4431

if isinstance(renderer, dict):

4432

yield from self._post_thread_entries(renderer)

4433

continue

4434

renderer = content.get('videoRenderer')

4435

if isinstance(renderer, dict):

4436

yield self._video_entry(renderer)

4437

4438

r''' # unused

4439

def _rich_grid_entries(self, contents):

4440

for content in contents:

4441

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4442

if video_renderer:

4443

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4449

for url in traverse_obj(renderer, (

4450

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4451

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4452

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4453

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4454

4455

def _extract_entries(self, parent_renderer, continuation_list):

4456

# continuation_list is modified in-place with continuation_list = [continuation_token]

4457

continuation_list[:] = [None]

4458

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4459

for content in contents:

4460

if not isinstance(content, dict):

4461

continue

4462

is_renderer = traverse_obj(

4463

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4464

expected_type=dict)

4465

if not is_renderer:

4466

if content.get('richItemRenderer'):

4467

for entry in self._rich_entries(content['richItemRenderer']):

4468

yield entry

4469

continuation_list[0] = self._extract_continuation(parent_renderer)

4470

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4471

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4472

yield from self._report_history_entries(table)

4473

continuation_list[0] = self._extract_continuation(table)

4474

continue

4475

4476

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4477

for isr_content in isr_contents:

4478

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4483

'gridRenderer': self._grid_entries,

4484

'reelShelfRenderer': self._grid_entries,

4485

'shelfRenderer': self._shelf_entries,

4486

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4487

'backstagePostThreadRenderer': self._post_thread_entries,

4488

'videoRenderer': lambda x: [self._video_entry(x)],

4489

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4490

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4491

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4492

}

4493

for key, renderer in isr_content.items():

4494

if key not in known_renderers:

4495

continue

4496

for entry in known_renderers[key](renderer):

4497

if entry:

4498

yield entry

4499

continuation_list[0] = self._extract_continuation(renderer)

4500

break

4501

4502

if not continuation_list[0]:

4503

continuation_list[0] = self._extract_continuation(is_renderer)

4504

4505

if not continuation_list[0]:

4506

continuation_list[0] = self._extract_continuation(parent_renderer)

4507

4508

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4509

continuation_list = [None]

4510

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4511

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4516

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4517

yield from extract_entries(parent_renderer)

4518

continuation = continuation_list[0]

4519

4520

for page_num in itertools.count(1):

4521

if not continuation:

4522

break

4523

headers = self.generate_api_headers(

4524

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4525

response = self._extract_response(

4526

item_id=f'{item_id} page {page_num}',

4527

query=continuation, headers=headers, ytcfg=ytcfg,

4528

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4533

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4534

visitor_data = self._extract_visitor_data(response) or visitor_data

4535

4536

known_renderers = {

4537

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4538

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4539

'gridVideoRenderer': (self._grid_entries, 'items'),

4540

'gridChannelRenderer': (self._grid_entries, 'items'),

4541

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4542

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4543

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4544

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4545

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4546

'playlistVideoListContinuation': (self._playlist_entries, None),

4547

'gridContinuation': (self._grid_entries, None),

4548

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4549

'sectionListContinuation': (extract_entries, None), # for feeds

4550

}

4551

4552

continuation_items = traverse_obj(response, (

4553

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4554

'appendContinuationItemsAction', 'continuationItems'

4555

), 'continuationContents', get_all=False)

4556

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4557

4558

video_items_renderer = None

4559

for key in continuation_item.keys():

4560

if key not in known_renderers:

4561

continue

4562

func, parent_key = known_renderers[key]

4563

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4564

continuation_list = [None]

4565

yield from func(video_items_renderer)

4566

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4567

4568

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4573

for tab in tabs:

4574

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4575

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4580

4581

def _extract_uploader(self, data):

4582

uploader = {}

4583

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4584

owner = try_get(

4585

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4586

if owner:

4587

owner_text = owner.get('text')

4588

uploader['uploader'] = self._search_regex(

4589

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4590

uploader['uploader_id'] = try_get(

4591

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4592

uploader['uploader_url'] = urljoin(

4593

'https://www.youtube.com/',

4594

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4595

return filter_dict(uploader)

4596

4597

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4598

playlist_id = title = description = channel_url = channel_name = channel_id = None

4599

tags = []

4600

4601

selected_tab = self._extract_selected_tab(tabs)

4602

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4603

renderer = try_get(

4604

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4605

if renderer:

4606

channel_name = renderer.get('title')

4607

channel_url = renderer.get('channelUrl')

4608

channel_id = renderer.get('externalId')

4609

else:

4610

renderer = try_get(

4611

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4612

4613

if renderer:

4614

title = renderer.get('title')

4615

description = renderer.get('description', '')

4616

playlist_id = channel_id

4617

tags = renderer.get('keywords', '').split()

4618

4619

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4620

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4621

def _get_uncropped(url):

4622

return url_or_none((url or '').split('=')[0] + '=s0')

4623

4624

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4625

if avatar_thumbnails:

4626

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4627

if uncropped_avatar:

4628

avatar_thumbnails.append({

4629

'url': uncropped_avatar,

4630

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4635

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4636

for banner in channel_banners:

4637

banner['preference'] = -10

4638

4639

if channel_banners:

4640

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4641

if uncropped_banner:

4642

channel_banners.append({

4643

'url': uncropped_banner,

4644

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4649

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4650

4651

if playlist_id is None:

4652

playlist_id = item_id

4653

4654

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4655

last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))

4656

if title is None:

4657

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4658

title += format_field(selected_tab, 'title', ' - %s')

4659

title += format_field(selected_tab, 'expandedText', ' - %s')

4660

4661

metadata = {

4662

'playlist_id': playlist_id,

4663

'playlist_title': title,

4664

'playlist_description': description,

4665

'uploader': channel_name,

4666

'uploader_id': channel_id,

4667

'uploader_url': channel_url,

4668

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4669

'tags': tags,

4670

'view_count': self._get_count(playlist_stats, 1),

4671

'availability': self._extract_availability(data),

4672

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4673

'playlist_count': self._get_count(playlist_stats, 0),

4674

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4675

}

4676

if not channel_id:

4677

metadata.update(self._extract_uploader(data))

4678

metadata.update({

4679

'channel': metadata['uploader'],

4680

'channel_id': metadata['uploader_id'],

4681

'channel_url': metadata['uploader_url']})

4682

return self.playlist_result(

4683

self._entries(

4684

selected_tab, playlist_id, ytcfg,

4685

self._extract_account_syncid(ytcfg, data),

4686

self._extract_visitor_data(data, ytcfg)),

4687

**metadata)

4688

4689

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4690

first_id = last_id = response = None

4691

for page_num in itertools.count(1):

4692

videos = list(self._playlist_entries(playlist))

4693

if not videos:

4694

return

4695

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4696

if start >= len(videos):

4697

return

4698

yield from videos[start:]

4699

first_id = first_id or videos[0]['id']

4700

last_id = videos[-1]['id']

4701

watch_endpoint = try_get(

4702

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4703

headers = self.generate_api_headers(

4704

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4705

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4706

query = {

4707

'playlistId': playlist_id,

4708

'videoId': watch_endpoint.get('videoId') or last_id,

4709

'index': watch_endpoint.get('index') or len(videos),

4710

'params': watch_endpoint.get('params') or 'OAE%3D'

4711

}

4712

response = self._extract_response(

4713

item_id='%s page %d' % (playlist_id, page_num),

4714

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4715

check_get_keys='contents'

4716

)

4717

playlist = try_get(

4718

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4719

4720

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4721

title = playlist.get('title') or try_get(

4722

data, lambda x: x['titleText']['simpleText'], str)

4723

playlist_id = playlist.get('playlistId') or item_id

4724

4725

# Delegating everything except mix playlists to regular tab-based playlist URL

4726

playlist_url = urljoin(url, try_get(

4727

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4728

str))

4729

4730

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4731

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4732

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4733

4734

if playlist_url and playlist_url != url and not is_known_unviewable:

4735

return self.url_result(

4736

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4737

video_title=title)

4738

4739

return self.playlist_result(

4740

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4741

playlist_id=playlist_id, playlist_title=title)

4742

4743

def _extract_availability(self, data):

4744

"""

4745

Gets the availability of a given playlist/tab.

4746

Note: Unless YouTube tells us explicitly, we do not assume it is public

4747

@param data: response

4748

"""

4749

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4750

4751

player_header_privacy = traverse_obj(

4752

data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)

4753

4754

badges = self._extract_badges(renderer)

4755

4756

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4757

privacy_setting_icon = traverse_obj(

4758

renderer, (

4759

'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4760

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4761

get_all=False, expected_type=str)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4766

or player_header_privacy == 'PUBLIC'

4767

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4768

else self._availability(

4769

is_private=(

4770

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4771

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4772

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4773

is_unlisted=(

4774

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4775

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4776

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),

4777

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4778

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4783

sidebar_renderer = try_get(

4784

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4785

for item in sidebar_renderer:

4786

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4791

"""

4792

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4793

"""

4794

browse_id = params = None

4795

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4796

if not renderer:

4797

return

4798

menu_renderer = try_get(

4799

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4800

for menu_item in menu_renderer:

4801

if not isinstance(menu_item, dict):

4802

continue

4803

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4804

text = try_get(

4805

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4806

if not text or text.lower() != 'show unavailable videos':

4807

continue

4808

browse_endpoint = try_get(

4809

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4810

browse_id = browse_endpoint.get('browseId')

4811

params = browse_endpoint.get('params')

4812

break

4813

4814

headers = self.generate_api_headers(

4815

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4816

visitor_data=self._extract_visitor_data(data, ytcfg))

4817

query = {

4818

'params': params or 'wgYCCAA=',

4819

'browseId': browse_id or 'VL%s' % item_id

4820

}

4821

return self._extract_response(

4822

item_id=item_id, headers=headers, query=query,

4823

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4824

note='Downloading API JSON with unavailable videos')

4825

4826

@functools.cached_property

4827

def skip_webpage(self):

4828

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4829

4830

def _extract_webpage(self, url, item_id, fatal=True):

4831

webpage, data = None, None

4832

for retry in self.RetryManager(fatal=fatal):

4833

try:

4834

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4835

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4836

except ExtractorError as e:

4837

if isinstance(e.cause, network_exceptions):

4838

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4839

retry.error = e

4840

continue

4841

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4846

except ExtractorError as e:

4847

self._error_or_warning(e, fatal=fatal)

4848

break

4849

4850

# Sometimes youtube returns a webpage with incomplete ytInitialData

4851

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4852

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4853

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4859

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4860

if not ytcfg and self.is_authenticated:

4861

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4862

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4863

raise ExtractorError(

4864

f'{msg}. If you are not downloading private content, or '

4865

'your cookies are only for the first account and channel,'

4866

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4867

expected=True)

4868

self.report_warning(msg, only_once=True)

4869

4870

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4871

data = None

4872

if not self.skip_webpage:

4873

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4874

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4875

# Reject webpage data if redirected to home page without explicitly requesting

4876

selected_tab = self._extract_selected_tab(traverse_obj(

4877

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4878

if (url != 'https://www.youtube.com/feed/recommended'

4879

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4880

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4881

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4882

if fatal:

4883

raise ExtractorError(msg, expected=True)

4884

self.report_warning(msg, only_once=True)

4885

if not data:

4886

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4887

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4888

return data, ytcfg

4889

4890

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4891

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4892

resolve_response = self._extract_response(

4893

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4894

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4895

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4896

for ep_key, ep in endpoints.items():

4897

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4898

if params:

4899

return self._extract_response(

4900

item_id=item_id, query=params, ep=ep, headers=headers,

4901

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4902

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4903

err_note = 'Failed to resolve url (does the playlist exist?)'

4904

if fatal:

4905

raise ExtractorError(err_note, expected=True)

4906

self.report_warning(err_note, item_id)

4907

4908

_SEARCH_PARAMS = None

4909

4910

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4911

data = {'query': query}

4912

if params is NO_DEFAULT:

4913

params = self._SEARCH_PARAMS

4914

if params:

4915

data['params'] = params

4916

4917

content_keys = (

4918

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4919

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4920

# ytmusic search

4921

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4922

('continuationContents', ),

4923

)

4924

display_id = f'query "{query}"'

4925

check_get_keys = tuple({keys[0] for keys in content_keys})

4926

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4927

self._report_playlist_authcheck(ytcfg, fatal=False)

4928

4929

continuation_list = [None]

4930

search = None

4931

for page_num in itertools.count(1):

4932

data.update(continuation_list[0] or {})

4933

headers = self.generate_api_headers(

4934

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4935

search = self._extract_response(

4936

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4937

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4938

slr_contents = traverse_obj(search, *content_keys)

4939

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4940

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4945

IE_DESC = 'YouTube Tabs'

4946

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4955

(?P<not_channel>

4956

feed/|hashtag/|

4957

(?:playlist|watch)\?.*?\blist=

4958

)|

4959

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4964

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4965

}

4966

IE_NAME = 'youtube:tab'

4967

4968

_TESTS = [{

4969

'note': 'playlists, multipage',

4970

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4971

'playlist_mincount': 94,

4972

'info_dict': {

4973

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4974

'title': 'Igor Kleiner - Playlists',

4975

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4976

'uploader': 'Igor Kleiner',

4977

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4978

'channel': 'Igor Kleiner',

4979

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4980

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4981

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4982

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4983

'channel_follower_count': int

4984

},

4985

}, {

4986

'note': 'playlists, multipage, different order',

4987

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4988

'playlist_mincount': 94,

4989

'info_dict': {

4990

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4991

'title': 'Igor Kleiner - Playlists',

4992

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4993

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4994

'uploader': 'Igor Kleiner',

4995

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4996

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4997

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4998

'channel': 'Igor Kleiner',

4999

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5000

'channel_follower_count': int

5001

},

5002

}, {

5003

'note': 'playlists, series',

5004

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5005

'playlist_mincount': 5,

5006

'info_dict': {

5007

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5008

'title': '3Blue1Brown - Playlists',

5009

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5010

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5011

'uploader': '3Blue1Brown',

5012

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5013

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5014

'channel': '3Blue1Brown',

5015

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5016

'tags': ['Mathematics'],

5017

'channel_follower_count': int

5018

},

5019

}, {

5020

'note': 'playlists, singlepage',

5021

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5022

'playlist_mincount': 4,

5023

'info_dict': {

5024

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5025

'title': 'ThirstForScience - Playlists',

5026

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5027

'uploader': 'ThirstForScience',

5028

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5029

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5030

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5031

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5032

'tags': 'count:13',

5033

'channel': 'ThirstForScience',

5034

'channel_follower_count': int

5035

}

5036

}, {

5037

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5038

'only_matching': True,

5039

}, {

5040

'note': 'basic, single video playlist',

5041

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5042

'info_dict': {

5043

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5044

'uploader': 'Sergey M.',

5045

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5046

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5051

'channel': 'Sergey M.',

5052

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5053

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5054

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5055

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5060

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5061

'info_dict': {

5062

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5063

'uploader': 'Sergey M.',

5064

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5065

'title': 'youtube-dl empty playlist',

5066

'tags': [],

5067

'channel': 'Sergey M.',

5068

'description': '',

5069

'modified_date': '20160902',

5070

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5071

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5072

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5073

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5079

'info_dict': {

5080

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5081

'title': 'lex will - Home',

5082

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5083

'uploader': 'lex will',

5084

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5085

'channel': 'lex will',

5086

'tags': ['bible', 'history', 'prophesy'],

5087

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5088

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5089

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5090

'channel_follower_count': int

5091

},

5092

'playlist_mincount': 2,

5093

}, {

5094

'note': 'Videos tab',

5095

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5096

'info_dict': {

5097

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5098

'title': 'lex will - Videos',

5099

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5100

'uploader': 'lex will',

5101

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5102

'tags': ['bible', 'history', 'prophesy'],

5103

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5104

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5105

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5106

'channel': 'lex will',

5107

'channel_follower_count': int

5108

},

5109

'playlist_mincount': 975,

5110

}, {

5111

'note': 'Videos tab, sorted by popular',

5112

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5113

'info_dict': {

5114

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5115

'title': 'lex will - Videos',

5116

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5117

'uploader': 'lex will',

5118

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5119

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5120

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5121

'channel': 'lex will',

5122

'tags': ['bible', 'history', 'prophesy'],

5123

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5124

'channel_follower_count': int

5125

},

5126

'playlist_mincount': 199,

5127

}, {

5128

'note': 'Playlists tab',

5129

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5130

'info_dict': {

5131

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5132

'title': 'lex will - Playlists',

5133

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5134

'uploader': 'lex will',

5135

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5136

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5137

'channel': 'lex will',

5138

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5139

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5140

'tags': ['bible', 'history', 'prophesy'],

5141

'channel_follower_count': int

5142

},

5143

'playlist_mincount': 17,

5144

}, {

5145

'note': 'Community tab',

5146

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5147

'info_dict': {

5148

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5149

'title': 'lex will - Community',

5150

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5151

'uploader': 'lex will',

5152

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5153

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5154

'channel': 'lex will',

5155

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5156

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5157

'tags': ['bible', 'history', 'prophesy'],

5158

'channel_follower_count': int

5159

},

5160

'playlist_mincount': 18,

5161

}, {

5162

'note': 'Channels tab',

5163

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5164

'info_dict': {

5165

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5166

'title': 'lex will - Channels',

5167

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5168

'uploader': 'lex will',

5169

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5170

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5171

'channel': 'lex will',

5172

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5173

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5174

'tags': ['bible', 'history', 'prophesy'],

5175

'channel_follower_count': int

5176

},

5177

'playlist_mincount': 12,

5178

}, {

5179

'note': 'Search tab',

5180

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5181

'playlist_mincount': 40,

5182

'info_dict': {

5183

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5184

'title': '3Blue1Brown - Search - linear algebra',

5185

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5186

'uploader': '3Blue1Brown',

5187

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5188

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5189

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5190

'tags': ['Mathematics'],

5191

'channel': '3Blue1Brown',

5192

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5193

'channel_follower_count': int

5194

},

5195

}, {

5196

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5197

'only_matching': True,

5198

}, {

5199

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5200

'only_matching': True,

5201

}, {

5202

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5203

'only_matching': True,

5204

}, {

5205

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5206

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5207

'info_dict': {

5208

'title': '29C3: Not my department',

5209

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5210

'uploader': 'Christiaan008',

5211

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5212

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5213

'tags': [],

5214

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5215

'view_count': int,

5216

'modified_date': '20150605',

5217

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5218

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5219

'channel': 'Christiaan008',

5220

'availability': 'public',

5221

},

5222

'playlist_count': 96,

5223

}, {

5224

'note': 'Large playlist',

5225

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5226

'info_dict': {

5227

'title': 'Uploads from Cauchemar',

5228

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5229

'uploader': 'Cauchemar',

5230

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5231

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5232

'tags': [],

5233

'modified_date': r're:\d{8}',

5234

'channel': 'Cauchemar',

5235

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5236

'view_count': int,

5237

'description': '',

5238

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5239

'availability': 'public',

5240

},

5241

'playlist_mincount': 1123,

5242

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5243

}, {

5244

'note': 'even larger playlist, 8832 videos',

5245

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5246

'only_matching': True,

5247

}, {

5248

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5249

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5250

'info_dict': {

5251

'title': 'Uploads from Interstellar Movie',

5252

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5253

'uploader': 'Interstellar Movie',

5254

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5255

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5256

'tags': [],

5257

'view_count': int,

5258

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5259

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5260

'channel': 'Interstellar Movie',

5261

'description': '',

5262

'modified_date': r're:\d{8}',

5263

'availability': 'public',

5264

},

5265

'playlist_mincount': 21,

5266

}, {

5267

'note': 'Playlist with "show unavailable videos" button',

5268

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5269

'info_dict': {

5270

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5271

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5272

'uploader': 'Phim Siêu Nhân Nhật Bản',

5273

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5274

'view_count': int,

5275

'channel': 'Phim Siêu Nhân Nhật Bản',

5276

'tags': [],

5277

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5278

'description': '',

5279

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5280

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5281

'modified_date': r're:\d{8}',

5282

'availability': 'public',

5283

},

5284

'playlist_mincount': 200,

5285

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5286

}, {

5287

'note': 'Playlist with unavailable videos in page 7',

5288

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5289

'info_dict': {

5290

'title': 'Uploads from BlankTV',

5291

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5292

'uploader': 'BlankTV',

5293

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5294

'channel': 'BlankTV',

5295

'channel_url': 'https://www.youtube.com/c/blanktv',

5296

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5297

'view_count': int,

5298

'tags': [],

5299

'uploader_url': 'https://www.youtube.com/c/blanktv',

5300

'modified_date': r're:\d{8}',

5301

'description': '',

5302

'availability': 'public',

5303

},

5304

'playlist_mincount': 1000,

5305

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5306

}, {

5307

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5308

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5309

'info_dict': {

5310

'title': 'Data Analysis with Dr Mike Pound',

5311

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5312

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5313

'uploader': 'Computerphile',

5314

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5315

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5316

'tags': [],

5317

'view_count': int,

5318

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5319

'channel_url': 'https://www.youtube.com/user/Computerphile',

5320

'channel': 'Computerphile',

5321

'availability': 'public',

5322

},

5323

'playlist_mincount': 11,

5324

}, {

5325

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5326

'only_matching': True,

5327

}, {

5328

'note': 'Playlist URL that does not actually serve a playlist',

5329

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5334

'uploader': 'STREEM',

5335

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5336

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5337

'upload_date': '20150526',

5338

'license': 'Standard YouTube License',

5339

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5340

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5347

},

5348

'skip': 'This video is not available.',

5349

'add_ie': [YoutubeIE.ie_key()],

5350

}, {

5351

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5352

'only_matching': True,

5353

}, {

5354

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5355

'only_matching': True,

5356

}, {

5357

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5358

'info_dict': {

5359

'id': 'Wq15eF5vCbI', # This will keep changing

5360

'ext': 'mp4',

5361

'title': str,

5362

'uploader': 'Sky News',

5363

'uploader_id': 'skynews',

5364

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5365

'upload_date': r're:\d{8}',

5366

'description': str,

5367

'categories': ['News & Politics'],

5368

'tags': list,

5369

'like_count': int,

5370

'release_timestamp': 1642502819,

5371

'channel': 'Sky News',

5372

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5373

'age_limit': 0,

5374

'view_count': int,

5375

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5376

'playable_in_embed': True,

5377

'release_date': '20220118',

5378

'availability': 'public',

5379

'live_status': 'is_live',

5380

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5381

'channel_follower_count': int

5382

},

5383

'params': {

5384

'skip_download': True,

5385

},

5386

'expected_warnings': ['Ignoring subtitle tracks found in '],

5387

}, {

5388

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5393

'uploader': 'The Young Turks',

5394

'uploader_id': 'TheYoungTurks',

5395

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5396

'upload_date': '20150715',

5397

'license': 'Standard YouTube License',

5398

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5399

'categories': ['News & Politics'],

5400

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5405

},

5406

'only_matching': True,

5407

}, {

5408

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5409

'only_matching': True,

5410

}, {

5411

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5412

'only_matching': True,

5413

}, {

5414

'note': 'A channel that is not live. Should raise error',

5415

'url': 'https://www.youtube.com/user/numberphile/live',

5416

'only_matching': True,

5417

}, {

5418

'url': 'https://www.youtube.com/feed/trending',

5419

'only_matching': True,

5420

}, {

5421

'url': 'https://www.youtube.com/feed/library',

5422

'only_matching': True,

5423

}, {

5424

'url': 'https://www.youtube.com/feed/history',

5425

'only_matching': True,

5426

}, {

5427

'url': 'https://www.youtube.com/feed/subscriptions',

5428

'only_matching': True,

5429

}, {

5430

'url': 'https://www.youtube.com/feed/watch_later',

5431

'only_matching': True,

5432

}, {

5433

'note': 'Recommended - redirects to home page.',

5434

'url': 'https://www.youtube.com/feed/recommended',

5435

'only_matching': True,

5436

}, {

5437

'note': 'inline playlist with not always working continuations',

5438

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5439

'only_matching': True,

5440

}, {

5441

'url': 'https://www.youtube.com/course',

5442

'only_matching': True,

5443

}, {

5444

'url': 'https://www.youtube.com/zsecurity',

5445

'only_matching': True,

5446

}, {

5447

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5448

'only_matching': True,

5449

}, {

5450

'url': 'https://www.youtube.com/TheYoungTurks/live',

5451

'only_matching': True,

5452

}, {

5453

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5460

}, {

5461

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5462

'only_matching': True,

5463

}, {

5464

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5465

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5466

'only_matching': True

5467

}, {

5468

'note': '/browse/ should redirect to /channel/',

5469

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5470

'only_matching': True

5471

}, {

5472

'note': 'VLPL, should redirect to playlist?list=PL...',

5473

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5474

'info_dict': {

5475

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5476

'uploader': 'NoCopyrightSounds',

5477

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5478

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5479

'title': 'NCS : All Releases 💿',

5480

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5481

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5482

'modified_date': r're:\d{8}',

5483

'view_count': int,

5484

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5485

'tags': [],

5486

'channel': 'NoCopyrightSounds',

5487

'availability': 'public',

5488

},

5489

'playlist_mincount': 166,

5490

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5491

}, {

5492

'note': 'Topic, should redirect to playlist?list=UU...',

5493

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5494

'info_dict': {

5495

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5496

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5497

'title': 'Uploads from Royalty Free Music - Topic',

5498

'uploader': 'Royalty Free Music - Topic',

5499

'tags': [],

5500

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5501

'channel': 'Royalty Free Music - Topic',

5502

'view_count': int,

5503

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5504

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5505

'modified_date': r're:\d{8}',

5506

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5507

'description': '',

5508

'availability': 'public',

5509

},

5510

'expected_warnings': [

5511

'The URL does not have a videos tab',

5512

r'[Uu]navailable videos (are|will be) hidden',

5513

],

5514

'playlist_mincount': 101,

5515

}, {

5516

'note': 'Topic without a UU playlist',

5517

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5518

'info_dict': {

5519

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5520

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5521

'tags': [],

5522

},

5523

'expected_warnings': [

5524

'the playlist redirect gave error',

5525

],

5526

'playlist_mincount': 9,

5527

}, {

5528

'note': 'Youtube music Album',

5529

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5530

'info_dict': {

5531

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5532

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5537

'modified_date': r're:\d{8}',

5538

},

5539

'playlist_count': 50,

5540

}, {

5541

'note': 'unlisted single video playlist',

5542

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5543

'info_dict': {

5544

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5545

'uploader': 'colethedj',

5546

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5547

'title': 'yt-dlp unlisted playlist test',

5548

'availability': 'unlisted',

5549

'tags': [],

5550

'modified_date': '20220418',

5551

'channel': 'colethedj',

5552

'view_count': int,

5553

'description': '',

5554

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5555

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5556

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5561

'url': 'https://www.youtube.com/feed/recommended',

5562

'info_dict': {

5563

'id': 'recommended',

5564

'title': 'recommended',

5565

'tags': [],

5566

},

5567

'playlist_mincount': 50,

5568

'params': {

5569

'skip_download': True,

5570

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5571

},

5572

}, {

5573

'note': 'API Fallback: /videos tab, sorted by oldest first',

5574

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5575

'info_dict': {

5576

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5577

'title': 'Cody\'sLab - Videos',

5578

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5579

'uploader': 'Cody\'sLab',

5580

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5581

'channel': 'Cody\'sLab',

5582

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5583

'tags': [],

5584

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5585

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5586

'channel_follower_count': int

5587

},

5588

'playlist_mincount': 650,

5589

'params': {

5590

'skip_download': True,

5591

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5592

},

5593

}, {

5594

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5595

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5596

'info_dict': {

5597

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5598

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5599

'title': 'Uploads from Royalty Free Music - Topic',

5600

'uploader': 'Royalty Free Music - Topic',

5601

'modified_date': r're:\d{8}',

5602

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5603

'description': '',

5604

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5605

'tags': [],

5606

'channel': 'Royalty Free Music - Topic',

5607

'view_count': int,

5608

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5609

'availability': 'public',

5610

},

5611

'expected_warnings': [

5612

'does not have a videos tab',

5613

r'[Uu]navailable videos (are|will be) hidden',

5614

],

5615

'playlist_mincount': 101,

5616

'params': {

5617

'skip_download': True,

5618

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5619

},

5620

}, {

5621

'note': 'non-standard redirect to regional channel',

5622

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5623

'only_matching': True

5624

}, {

5625

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5626

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5627

'info_dict': {

5628

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5629

'modified_date': '20220407',

5630

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5631

'tags': [],

5632

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5633

'uploader': 'pukkandan',

5634

'availability': 'unlisted',

5635

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5636

'channel': 'pukkandan',

5637

'description': 'Test for collaborative playlist',

5638

'title': 'yt-dlp test - collaborative playlist',

5639

'view_count': int,

5640

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5641

},

5642

'playlist_mincount': 2

5643

}, {

5644

'note': 'translated tab name',

5645

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5646

'info_dict': {

5647

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5648

'tags': [],

5649

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5650

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5651

'description': '',

5652

'title': 'cole-dlp-test-acc - 再生リスト',

5653

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5654

'uploader': 'cole-dlp-test-acc',

5655

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5656

'channel': 'cole-dlp-test-acc',

5657

},

5658

'playlist_mincount': 1,

5659

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5660

'expected_warnings': ['Preferring "ja"'],

5661

}, {

5662

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5663

'note': 'preferred lang set with playlist with translated video titles',

5664

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5665

'info_dict': {

5666

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5667

'tags': [],

5668

'view_count': int,

5669

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5670

'uploader': 'cole-dlp-test-acc',

5671

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5672

'channel': 'cole-dlp-test-acc',

5673

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5674

'description': 'test',

5675

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5676

'title': 'dlp test playlist',

5677

'availability': 'public',

5678

},

5679

'playlist_mincount': 1,

5680

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5681

'expected_warnings': ['Preferring "ja"'],

5682

}, {

5683

# shorts audio pivot for 2GtVksBMYFM.

5684

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5685

'info_dict': {

5686

'id': 'sfv_audio_pivot',

5687

'title': 'sfv_audio_pivot',

5688

'tags': [],

5689

},

5690

'playlist_mincount': 50,

}]

@classmethod

def suitable(cls, url):

5696

return False if YoutubeIE.suitable(url) else super().suitable(url)

5697

5698

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5699

5700

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5701

def _real_extract(self, url, smuggled_data):

5702

item_id = self._match_id(url)

5703

url = urllib.parse.urlunparse(

5704

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5705

compat_opts = self.get_param('compat_opts', [])

5706

5707

def get_mobj(url):

5708

mobj = self._URL_RE.match(url).groupdict()

5709

mobj.update((k, '') for k, v in mobj.items() if v is None)

5710

return mobj

5711

5712

mobj, redirect_warning = get_mobj(url), None

5713

# Youtube returns incomplete data if tabname is not lower case

5714

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5715

if is_channel:

5716

if smuggled_data.get('is_music_url'):

5717

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5718

item_id = item_id[2:]

5719

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5720

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5721

mdata = self._extract_tab_endpoint(

5722

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5723

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5724

get_all=False, expected_type=str)

5725

if not murl:

5726

raise ExtractorError('Failed to resolve album to playlist')

5727

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5728

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5729

pre = f'https://www.youtube.com/channel/{item_id}'

5730

5731

original_tab_name = tab

5732

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5733

# Home URLs should redirect to /videos/

5734

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5735

'To download only the videos in the home page, add a "/featured" to the URL')

5736

tab = '/videos'

5737

5738

url = ''.join((pre, tab, post))

5739

mobj = get_mobj(url)

5740

5741

# Handle both video/playlist URLs

5742

qs = parse_qs(url)

5743

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5744

5745

if not video_id and mobj['not_channel'].startswith('watch'):

5746

if not playlist_id:

5747

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5748

raise ExtractorError('Unable to recognize tab page')

5749

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5750

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5751

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5752

mobj = get_mobj(url)

5753

5754

if video_id and playlist_id:

5755

if self.get_param('noplaylist'):

5756

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5757

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5758

ie=YoutubeIE.ie_key(), video_id=video_id)

5759

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5760

5761

data, ytcfg = self._extract_data(url, item_id)

5762

5763

# YouTube may provide a non-standard redirect to the regional channel

5764

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5765

redirect_url = traverse_obj(

5766

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5767

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5768

redirect_url = ''.join((

5769

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5770

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5771

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5772

5773

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5774

if tabs:

5775

selected_tab = self._extract_selected_tab(tabs)

5776

selected_tab_url = urljoin(

5777

url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

5778

translated_tab_name = selected_tab.get('title', '').lower()

5779

5780

# Prefer tab name from tab url as it is always in en,

5781

# but only when preferred lang is set as it may not extract reliably in all cases.

5782

selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name

5783

or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary

5784

or translated_tab_name)

5785

5786

if selected_tab_name == 'home':

5787

selected_tab_name = 'featured'

5788

requested_tab_name = mobj['tab'][1:]

5789

5790

if 'no-youtube-channel-redirect' not in compat_opts:

5791

if requested_tab_name == 'live': # Live tab should have redirected to the video

5792

raise UserNotLive(video_id=mobj['id'])

5793

if requested_tab_name not in ('', selected_tab_name):

5794

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5795

if not original_tab_name:

5796

if item_id[:2] == 'UC':

5797

# Topic channels don't have /videos. Use the equivalent playlist instead

5798

pl_id = f'UU{item_id[2:]}'

5799

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5800

try:

5801

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5802

except ExtractorError:

5803

redirect_warning += ' and the playlist redirect gave error'

5804

else:

5805

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5806

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5807

if selected_tab_name and selected_tab_name != requested_tab_name:

5808

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5809

else:

5810

raise ExtractorError(redirect_warning, expected=True)

5811

5812

if redirect_warning:

5813

self.to_screen(redirect_warning)

5814

self.write_debug(f'Final URL: {url}')

5815

5816

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5817

if 'no-youtube-unavailable-videos' not in compat_opts:

5818

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5819

self._extract_and_report_alerts(data, only_once=True)

5820

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5821

if tabs:

5822

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5823

5824

playlist = traverse_obj(

5825

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5826

if playlist:

5827

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5828

5829

video_id = traverse_obj(

5830

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5831

if video_id:

5832

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5833

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5834

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5835

ie=YoutubeIE.ie_key(), video_id=video_id)

5836

5837

raise ExtractorError('Unable to recognize tab page')

5838

5839

5840

class YoutubePlaylistIE(InfoExtractor):

5841

IE_DESC = 'YouTube playlists'

5842

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5853

)''' % {

5854

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5855

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5856

}

5857

IE_NAME = 'youtube:playlist'

5858

_TESTS = [{

5859

'note': 'issue #673',

5860

'url': 'PLBB231211A4F62143',

5861

'info_dict': {

5862

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5863

'id': 'PLBB231211A4F62143',

5864

'uploader': 'Wickman',

5865

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5866

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5867

'view_count': int,

5868

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5869

'modified_date': r're:\d{8}',

5870

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5871

'channel': 'Wickman',

5872

'tags': [],

5873

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5874

},

5875

'playlist_mincount': 29,

5876

}, {

5877

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5878

'info_dict': {

5879

'title': 'YDL_safe_search',

5880

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5881

},

5882

'playlist_count': 2,

5883

'skip': 'This playlist is private',

5884

}, {

5885

'note': 'embedded',

5886

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5891

'uploader': 'milan',

5892

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5893

'description': '',

5894

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5895

'tags': [],

5896

'modified_date': '20140919',

5897

'view_count': int,

5898

'channel': 'milan',

5899

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5900

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5901

'availability': 'public',

5902

},

5903

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5904

}, {

5905

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5906

'playlist_mincount': 455,

5907

'info_dict': {

5908

'title': '2018 Chinese New Singles (11/6 updated)',

5909

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5910

'uploader': 'LBK',

5911

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5912

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5913

'channel': 'LBK',

5914

'view_count': int,

5915

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5916

'tags': [],

5917

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5918

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5919

'modified_date': r're:\d{8}',

5920

'availability': 'public',

5921

},

5922

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5923

}, {

5924

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5925

'only_matching': True,

5926

}, {

5927

# music album playlist

5928

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5929

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5934

if YoutubeTabIE.suitable(url):

5935

return False

5936

from ..utils import parse_qs

5937

qs = parse_qs(url)

5938

if qs.get('v', [None])[0]:

5939

return False

5940

return super().suitable(url)

5941

5942

def _real_extract(self, url):

5943

playlist_id = self._match_id(url)

5944

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5945

url = update_url_query(

5946

'https://www.youtube.com/playlist',

5947

parse_qs(url) or {'list': playlist_id})

5948

if is_music_url:

5949

url = smuggle_url(url, {'is_music_url': True})

5950

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5951

5952

5953

class YoutubeYtBeIE(InfoExtractor):

5954

IE_DESC = 'youtu.be'

5955

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5956

_TESTS = [{

5957

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5962

'uploader': 'Backus-Page House Museum',

5963

'uploader_id': 'backuspagemuseum',

5964

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5965

'upload_date': '20161008',

5966

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5967

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5972

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5973

'channel': 'Backus-Page House Museum',

5974

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5975

'live_status': 'not_live',

5976

'view_count': int,

5977

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5978

'availability': 'public',

5979

'duration': 59,

5980

'comment_count': int,

5981

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5986

},

5987

}, {

5988

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5989

'only_matching': True,

5990

}]

5991

5992

def _real_extract(self, url):

5993

mobj = self._match_valid_url(url)

5994

video_id = mobj.group('id')

5995

playlist_id = mobj.group('playlist_id')

5996

return self.url_result(

5997

update_url_query('https://www.youtube.com/watch', {

5998

'v': video_id,

5999

'list': playlist_id,

6000

'feature': 'youtu.be',

6001

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6002

6003

6004

class YoutubeLivestreamEmbedIE(InfoExtractor):

6005

IE_DESC = 'YouTube livestream embeds'

6006

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6007

_TESTS = [{

6008

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6009

'only_matching': True,

6010

}]

6011

6012

def _real_extract(self, url):

6013

channel_id = self._match_id(url)

6014

return self.url_result(

6015

f'https://www.youtube.com/channel/{channel_id}/live',

6016

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6017

6018

6019

class YoutubeYtUserIE(InfoExtractor):

6020

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6021

IE_NAME = 'youtube:user'

6022

_VALID_URL = r'ytuser:(?P<id>.+)'

6023

_TESTS = [{

6024

'url': 'ytuser:phihag',

6025

'only_matching': True,

6026

}]

6027

6028

def _real_extract(self, url):

6029

user_id = self._match_id(url)

6030

return self.url_result(

6031

'https://www.youtube.com/user/%s/videos' % user_id,

6032

ie=YoutubeTabIE.ie_key(), video_id=user_id)

6033

6034

6035

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6036

IE_NAME = 'youtube:favorites'

6037

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6038

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6039

_LOGIN_REQUIRED = True

6040

_TESTS = [{

6041

'url': ':ytfav',

6042

'only_matching': True,

6043

}, {

6044

'url': ':ytfavorites',

6045

'only_matching': True,

6046

}]

6047

6048

def _real_extract(self, url):

6049

return self.url_result(

6050

'https://www.youtube.com/playlist?list=LL',

6051

ie=YoutubeTabIE.ie_key())

6052

6053

6054

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6055

IE_NAME = 'youtube:notif'

6056

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6057

_VALID_URL = r':ytnotif(?:ication)?s?'

6058

_LOGIN_REQUIRED = True

6059

_TESTS = [{

6060

'url': ':ytnotif',

6061

'only_matching': True,

6062

}, {

6063

'url': ':ytnotifications',

6064

'only_matching': True,

6065

}]

6066

6067

def _extract_notification_menu(self, response, continuation_list):

6068

notification_list = traverse_obj(

6069

response,

6070

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6071

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6072

expected_type=list) or []

6073

continuation_list[0] = None

6074

for item in notification_list:

6075

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6076

if entry:

6077

yield entry

6078

continuation = item.get('continuationItemRenderer')

6079

if continuation:

6080

continuation_list[0] = continuation

6081

6082

def _extract_notification_renderer(self, notification):

6083

video_id = traverse_obj(

6084

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6085

url = f'https://www.youtube.com/watch?v={video_id}'

6086

channel_id = None

6087

if not video_id:

6088

browse_ep = traverse_obj(

6089

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6090

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6091

post_id = self._search_regex(

6092

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6093

'post id', default=None)

6094

if not channel_id or not post_id:

6095

return

6096

# The direct /post url redirects to this in the browser

6097

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6098

6099

channel = traverse_obj(

6100

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6101

expected_type=str)

6102

notification_title = self._get_text(notification, 'shortMessage')

6103

if notification_title:

6104

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6105

# TODO: handle recommended videos

6106

title = self._search_regex(

6107

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6108

'video title', default=None)

6109

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6110

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6116

'video_id': video_id,

6117

'title': title,

6118

'channel_id': channel_id,

6119

'channel': channel,

6120

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6121

'timestamp': timestamp,

6122

}

6123

6124

def _notification_menu_entries(self, ytcfg):

6125

continuation_list = [None]

6126

response = None

6127

for page in itertools.count(1):

6128

ctoken = traverse_obj(

6129

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6130

response = self._extract_response(

6131

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6132

ep='notification/get_notification_menu', check_get_keys='actions',

6133

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6134

yield from self._extract_notification_menu(response, continuation_list)

6135

if not continuation_list[0]:

6136

break

6137

6138

def _real_extract(self, url):

6139

display_id = 'notifications'

6140

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6141

self._report_playlist_authcheck(ytcfg)

6142

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6143

6144

6145

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6146

IE_DESC = 'YouTube search'

6147

IE_NAME = 'youtube:search'

6148

_SEARCH_KEY = 'ytsearch'

6149

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6150

_TESTS = [{

6151

'url': 'ytsearch5:youtube-dl test video',

6152

'playlist_count': 5,

6153

'info_dict': {

6154

'id': 'youtube-dl test video',

6155

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6161

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6162

_SEARCH_KEY = 'ytsearchdate'

6163

IE_DESC = 'YouTube search, newest videos first'

6164

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6165

_TESTS = [{

6166

'url': 'ytsearchdate5:youtube-dl test video',

6167

'playlist_count': 5,

6168

'info_dict': {

6169

'id': 'youtube-dl test video',

6170

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6176

IE_DESC = 'YouTube search URLs with sorting and filter support'

6177

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6178

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6179

_TESTS = [{

6180

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6181

'playlist_mincount': 5,

6182

'info_dict': {

6183

'id': 'youtube-dl test video',

6184

'title': 'youtube-dl test video',

6185

}

6186

}, {

6187

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6188

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6195

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6200

# 'entries': [{

6201

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6207

'only_matching': True,

6208

}]

6209

6210

def _real_extract(self, url):

6211

qs = parse_qs(url)

6212

query = (qs.get('search_query') or qs.get('q'))[0]

6213

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6214

6215

6216

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6217

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6218

IE_NAME = 'youtube:music:search_url'

6219

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6220

_TESTS = [{

6221

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6222

'playlist_count': 16,

6223

'info_dict': {

6224

'id': 'royalty free music',

6225

'title': 'royalty free music',

6226

}

6227

}, {

6228

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6229

'playlist_mincount': 30,

6230

'info_dict': {

6231

'id': 'royalty free music - songs',

6232

'title': 'royalty free music - songs',

6233

},

6234

'params': {'extract_flat': 'in_playlist'}

6235

}, {

6236

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6237

'playlist_mincount': 30,

6238

'info_dict': {

6239

'id': 'royalty free music - community playlists',

6240

'title': 'royalty free music - community playlists',

6241

},

6242

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6247

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6248

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6249

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6250

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6251

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6252

}

6253

6254

def _real_extract(self, url):

6255

qs = parse_qs(url)

6256

query = (qs.get('search_query') or qs.get('q'))[0]

6257

params = qs.get('sp', (None,))[0]

6258

if params:

6259

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6260

else:

6261

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6262

params = self._SECTIONS.get(section)

6263

if not params:

6264

section = None

6265

title = join_nonempty(query, section, delim=' - ')

6266

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6267

6268

6269

class YoutubeFeedsInfoExtractor(InfoExtractor):

6270

"""

6271

Base class for feed extractors

6272

Subclasses must re-define the _FEED_NAME property.

6273

"""

6274

_LOGIN_REQUIRED = True

6275

_FEED_NAME = 'feeds'

6276

6277

def _real_initialize(self):

6278

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6283

6284

def _real_extract(self, url):

6285

return self.url_result(

6286

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6287

6288

6289

class YoutubeWatchLaterIE(InfoExtractor):

6290

IE_NAME = 'youtube:watchlater'

6291

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6292

_VALID_URL = r':ytwatchlater'

6293

_TESTS = [{

6294

'url': ':ytwatchlater',

6295

'only_matching': True,

6296

}]

6297

6298

def _real_extract(self, url):

6299

return self.url_result(

6300

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6301

6302

6303

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6304

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6305

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6306

_FEED_NAME = 'recommended'

6307

_LOGIN_REQUIRED = False

6308

_TESTS = [{

6309

'url': ':ytrec',

6310

'only_matching': True,

6311

}, {

6312

'url': ':ytrecommended',

6313

'only_matching': True,

6314

}, {

6315

'url': 'https://youtube.com',

6316

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6321

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6322

_VALID_URL = r':ytsub(?:scription)?s?'

6323

_FEED_NAME = 'subscriptions'

6324

_TESTS = [{

6325

'url': ':ytsubs',

6326

'only_matching': True,

6327

}, {

6328

'url': ':ytsubscriptions',

6329

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6334

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6335

_VALID_URL = r':ythis(?:tory)?'

6336

_FEED_NAME = 'history'

6337

_TESTS = [{

6338

'url': ':ythistory',

6339

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6344

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6345

IE_NAME = 'youtube:stories'

6346

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6347

_TESTS = [{

6348

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6349

'only_matching': True,

6350

}]

6351

6352

def _real_extract(self, url):

6353

playlist_id = f'RLTD{self._match_id(url)}'

6354

return self.url_result(

6355

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6356

ie=YoutubeTabIE, video_id=playlist_id)

6357

6358

6359

class YoutubeShortsAudioPivotIE(InfoExtractor):

6360

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6361

IE_NAME = 'youtube:shorts:pivot:audio'

6362

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6363

_TESTS = [{

6364

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6365

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6370

"""

6371

Generates sfv_audio_pivot browse params for this video id

6372

"""

6373

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6374

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6375

6376

def _real_extract(self, url):

6377

video_id = self._match_id(url)

6378

return self.url_result(

6379

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6384

IE_NAME = 'youtube:truncated_url'

6385

IE_DESC = False # Do not list

6386

_VALID_URL = r'''(?x)

6387

(?:https?://)?

6388

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6389

(?:watch\?(?:

6390

feature=[a-z_]+|

6391

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6404

'only_matching': True,

6405

}, {

6406

'url': 'https://www.youtube.com/watch?',

6407

'only_matching': True,

6408

}, {

6409

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6410

'only_matching': True,

6411

}, {

6412

'url': 'https://www.youtube.com/watch?feature=foo',

6413

'only_matching': True,

6414

}, {

6415

'url': 'https://www.youtube.com/watch?hl=en-GB',

6416

'only_matching': True,

6417

}, {

6418

'url': 'https://www.youtube.com/watch?t=2372',

6419

'only_matching': True,

6420

}]

6421

6422

def _real_extract(self, url):

6423

raise ExtractorError(

6424

'Did you forget to quote the URL? Remember that & is a meta '

6425

'character in most shells, so you want to put the URL in quotes, '

6426

'like youtube-dl '

6427

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6428

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6433

IE_NAME = 'youtube:clip'

6434

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6435

_TESTS = [{

6436

# FIXME: Other metadata should be extracted from the clip, not from the base video

6437

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6438

'info_dict': {

6439

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6440

'ext': 'mp4',

6441

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6446

'categories': ['Gaming'],

6447

'channel': 'Scott The Woz',

6448

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6449

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6450

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6451

'like_count': int,

6452

'playable_in_embed': True,

6453

'tags': 'count:17',

6454

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6455

'title': 'Mobile Games on Console - Scott The Woz',

6456

'upload_date': '20210920',

6457

'uploader': 'Scott The Woz',

6458

'uploader_id': 'scottthewoz',

6459

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6460

'view_count': int,

6461

'live_status': 'not_live',

6462

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6467

clip_id = self._match_id(url)

6468

_, data = self._extract_webpage(url, clip_id)

6469

6470

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6471

if not video_id:

6472

raise ExtractorError('Unable to find video ID')

6473

6474

clip_data = traverse_obj(data, (

6475

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6476

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6477

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6478

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6479

6480

return {

6481

'_type': 'url_transparent',

6482

'url': f'https://www.youtube.com/watch?v={video_id}',

6483

'ie_key': YoutubeIE.ie_key(),

6484

'id': clip_id,

6485

'section_start': int(clip_data['startTimeMs']) / 1000,

6486

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6491

IE_NAME = 'youtube:truncated_id'

6492

IE_DESC = False # Do not list

6493

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6494

6495

_TESTS = [{

6496

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6497

'only_matching': True,

6498

}]

6499

6500

def _real_extract(self, url):

6501

video_id = self._match_id(url)

6502

raise ExtractorError(

6503

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6504

expected=True)