jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import enum
	6	import hashlib
	7	import itertools
	8	import json
	9	import math
	10	import os.path
	11	import random
	12	import re
	13	import sys
	14	import threading
	15	import time
	16	import traceback
	17	import urllib.error
	18	import urllib.parse
	19
	20	from .common import InfoExtractor, SearchInfoExtractor
	21	from .openload import PhantomJSwrapper
	22	from ..compat import functools
	23	from ..jsinterp import JSInterpreter
	24	from ..utils import (
	25	NO_DEFAULT,
	26	ExtractorError,
	27	LazyList,
	28	UserNotLive,
	29	bug_reports_message,
	30	classproperty,
	31	clean_html,
	32	datetime_from_str,
	33	dict_get,
	34	filter_dict,
	35	float_or_none,
	36	format_field,
	37	get_first,
	38	int_or_none,
	39	is_html,
	40	join_nonempty,
	41	js_to_json,
	42	mimetype2ext,
	43	network_exceptions,
	44	orderedSet,
	45	parse_codecs,
	46	parse_count,
	47	parse_duration,
	48	parse_iso8601,
	49	parse_qs,
	50	qualities,
	51	remove_start,
	52	smuggle_url,
	53	str_or_none,
	54	str_to_int,
	55	strftime_or_none,
	56	traverse_obj,
	57	try_get,
	58	unescapeHTML,
	59	unified_strdate,
	60	unified_timestamp,
	61	unsmuggle_url,
	62	update_url_query,
	63	url_or_none,
	64	urljoin,
	65	variadic,
	66	)
	67
	68	# any clients starting with _ cannot be explicitly requested by the user
	69	INNERTUBE_CLIENTS = {
	70	'web': {
	71	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	72	'INNERTUBE_CONTEXT': {
	73	'client': {
	74	'clientName': 'WEB',
	75	'clientVersion': '2.20220801.00.00',
	76	}
	77	},
	78	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	79	},
	80	'web_embedded': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB_EMBEDDED_PLAYER',
	85	'clientVersion': '1.20220731.00.00',
	86	},
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	89	},
	90	'web_music': {
	91	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	92	'INNERTUBE_HOST': 'music.youtube.com',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_REMIX',
	96	'clientVersion': '1.20220727.01.00',
	97	}
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	100	},
	101	'web_creator': {
	102	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_CREATOR',
	106	'clientVersion': '1.20220726.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	110	},
	111	'android': {
	112	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'ANDROID',
	116	'clientVersion': '17.31.35',
	117	'androidSdkVersion': 30,
	118	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	119	}
	120	},
	121	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	122	'REQUIRE_JS_PLAYER': False
	123	},
	124	'android_embedded': {
	125	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	126	'INNERTUBE_CONTEXT': {
	127	'client': {
	128	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	129	'clientVersion': '17.31.35',
	130	'androidSdkVersion': 30,
	131	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	132	},
	133	},
	134	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	135	'REQUIRE_JS_PLAYER': False
	136	},
	137	'android_music': {
	138	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	139	'INNERTUBE_CONTEXT': {
	140	'client': {
	141	'clientName': 'ANDROID_MUSIC',
	142	'clientVersion': '5.16.51',
	143	'androidSdkVersion': 30,
	144	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	145	}
	146	},
	147	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	148	'REQUIRE_JS_PLAYER': False
	149	},
	150	'android_creator': {
	151	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	152	'INNERTUBE_CONTEXT': {
	153	'client': {
	154	'clientName': 'ANDROID_CREATOR',
	155	'clientVersion': '22.30.100',
	156	'androidSdkVersion': 30,
	157	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '17.33.2',
	171	'deviceModel': 'iPhone14,3',
	172	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	173	}
	174	},
	175	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	176	'REQUIRE_JS_PLAYER': False
	177	},
	178	'ios_embedded': {
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MESSAGES_EXTENSION',
	182	'clientVersion': '17.33.2',
	183	'deviceModel': 'iPhone14,3',
	184	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '5.21',
	196	'deviceModel': 'iPhone14,3',
	197	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	198	},
	199	},
	200	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	201	'REQUIRE_JS_PLAYER': False
	202	},
	203	'ios_creator': {
	204	'INNERTUBE_CONTEXT': {
	205	'client': {
	206	'clientName': 'IOS_CREATOR',
	207	'clientVersion': '22.33.101',
	208	'deviceModel': 'iPhone14,3',
	209	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	210	},
	211	},
	212	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	213	'REQUIRE_JS_PLAYER': False
	214	},
	215	# mweb has 'ultralow' formats
	216	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	217	'mweb': {
	218	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	219	'INNERTUBE_CONTEXT': {
	220	'client': {
	221	'clientName': 'MWEB',
	222	'clientVersion': '2.20220801.00.00',
	223	}
	224	},
	225	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	226	},
	227	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	228	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	229	'tv_embedded': {
	230	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	231	'INNERTUBE_CONTEXT': {
	232	'client': {
	233	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	234	'clientVersion': '2.0',
	235	},
	236	},
	237	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	238	},
	239	}
	240
	241
	242	def _split_innertube_client(client_name):
	243	variant, *base = client_name.rsplit('.', 1)
	244	if base:
	245	return variant, base[0], variant
	246	base, *variant = client_name.split('_', 1)
	247	return client_name, base, variant[0] if variant else None
	248
	249
	250	def build_innertube_clients():
	251	THIRD_PARTY = {
	252	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	253	}
	254	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	255	priority = qualities(BASE_CLIENTS[::-1])
	256
	257	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	258	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	259	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	260	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	261	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	262
	263	_, base_client, variant = _split_innertube_client(client)
	264	ytcfg['priority'] = 10 * priority(base_client)
	265
	266	if not variant:
	267	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	268	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	269	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	270	embedscreen['priority'] -= 3
	271	elif variant == 'embedded':
	272	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	273	ytcfg['priority'] -= 2
	274	else:
	275	ytcfg['priority'] -= 3
	276
	277
	278	build_innertube_clients()
	279
	280
	281	class BadgeType(enum.Enum):
	282	AVAILABILITY_UNLISTED = enum.auto()
	283	AVAILABILITY_PRIVATE = enum.auto()
	284	AVAILABILITY_PUBLIC = enum.auto()
	285	AVAILABILITY_PREMIUM = enum.auto()
	286	AVAILABILITY_SUBSCRIPTION = enum.auto()
	287	LIVE_NOW = enum.auto()
	288
	289
	290	class YoutubeBaseInfoExtractor(InfoExtractor):
	291	"""Provide base functions for Youtube extractors"""
	292
	293	_RESERVED_NAMES = (
	294	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	295	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	296	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	297	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	298
	299	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	300
	301	# _NETRC_MACHINE = 'youtube'
	302
	303	# If True it will raise an error if no login info is provided
	304	_LOGIN_REQUIRED = False
	305
	306	_INVIDIOUS_SITES = (
	307	# invidious-redirect websites
	308	r'(?:www\.)?redirect\.invidious\.io',
	309	r'(?:(?:www\|dev)\.)?invidio\.us',
	310	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	311	r'(?:www\.)?invidious\.pussthecat\.org',
	312	r'(?:www\.)?invidious\.zee\.li',
	313	r'(?:www\.)?invidious\.ethibox\.fr',
	314	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	315	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	316	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	317	# youtube-dl invidious instances list
	318	r'(?:(?:www\|no)\.)?invidiou\.sh',
	319	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	320	r'(?:www\.)?invidious\.kabi\.tk',
	321	r'(?:www\.)?invidious\.mastodon\.host',
	322	r'(?:www\.)?invidious\.zapashcanon\.fr',
	323	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	324	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	325	r'(?:www\.)?invidious\.himiko\.cloud',
	326	r'(?:www\.)?invidious\.reallyancient\.tech',
	327	r'(?:www\.)?invidious\.tube',
	328	r'(?:www\.)?invidiou\.site',
	329	r'(?:www\.)?invidious\.site',
	330	r'(?:www\.)?invidious\.xyz',
	331	r'(?:www\.)?invidious\.nixnet\.xyz',
	332	r'(?:www\.)?invidious\.048596\.xyz',
	333	r'(?:www\.)?invidious\.drycat\.fr',
	334	r'(?:www\.)?inv\.skyn3t\.in',
	335	r'(?:www\.)?tube\.poal\.co',
	336	r'(?:www\.)?tube\.connect\.cafe',
	337	r'(?:www\.)?vid\.wxzm\.sx',
	338	r'(?:www\.)?vid\.mint\.lgbt',
	339	r'(?:www\.)?vid\.puffyan\.us',
	340	r'(?:www\.)?yewtu\.be',
	341	r'(?:www\.)?yt\.elukerio\.org',
	342	r'(?:www\.)?yt\.lelux\.fi',
	343	r'(?:www\.)?invidious\.ggc-project\.de',
	344	r'(?:www\.)?yt\.maisputain\.ovh',
	345	r'(?:www\.)?ytprivate\.com',
	346	r'(?:www\.)?invidious\.13ad\.de',
	347	r'(?:www\.)?invidious\.toot\.koeln',
	348	r'(?:www\.)?invidious\.fdn\.fr',
	349	r'(?:www\.)?watch\.nettohikari\.com',
	350	r'(?:www\.)?invidious\.namazso\.eu',
	351	r'(?:www\.)?invidious\.silkky\.cloud',
	352	r'(?:www\.)?invidious\.exonip\.de',
	353	r'(?:www\.)?invidious\.riverside\.rocks',
	354	r'(?:www\.)?invidious\.blamefran\.net',
	355	r'(?:www\.)?invidious\.moomoo\.de',
	356	r'(?:www\.)?ytb\.trom\.tf',
	357	r'(?:www\.)?yt\.cyberhost\.uk',
	358	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	359	r'(?:www\.)?qklhadlycap4cnod\.onion',
	360	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	361	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	362	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	363	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	364	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	365	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	366	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	367	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	368	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	369	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	370	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	371	r'(?:www\.)?piped\.kavin\.rocks',
	372	r'(?:www\.)?piped\.silkky\.cloud',
	373	r'(?:www\.)?piped\.tokhmi\.xyz',
	374	r'(?:www\.)?piped\.moomoo\.me',
	375	r'(?:www\.)?il\.ax',
	376	r'(?:www\.)?piped\.syncpundit\.com',
	377	r'(?:www\.)?piped\.mha\.fi',
	378	r'(?:www\.)?piped\.mint\.lgbt',
	379	r'(?:www\.)?piped\.privacy\.com\.de',
	380	)
	381
	382	# extracted from account/account_menu ep
	383	# XXX: These are the supported YouTube UI and API languages,
	384	# which is slightly different from languages supported for translation in YouTube studio
	385	_SUPPORTED_LANG_CODES = [
	386	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	387	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	388	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	389	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	390	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	391	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	392	]
	393
	394	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	395
	396	@functools.cached_property
	397	def _preferred_lang(self):
	398	"""
	399	Returns a language code supported by YouTube for the user preferred language.
	400	Returns None if no preferred language set.
	401	"""
	402	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	403	if not preferred_lang:
	404	return
	405	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	406	raise ExtractorError(
	407	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	408	expected=True)
	409	elif preferred_lang != 'en':
	410	self.report_warning(
	411	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	412	return preferred_lang
	413
	414	def _initialize_consent(self):
	415	cookies = self._get_cookies('https://www.youtube.com/')
	416	if cookies.get('__Secure-3PSID'):
	417	return
	418	consent_id = None
	419	consent = cookies.get('CONSENT')
	420	if consent:
	421	if 'YES' in consent.value:
	422	return
	423	consent_id = self._search_regex(
	424	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	425	if not consent_id:
	426	consent_id = random.randint(100, 999)
	427	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	428
	429	def _initialize_pref(self):
	430	cookies = self._get_cookies('https://www.youtube.com/')
	431	pref_cookie = cookies.get('PREF')
	432	pref = {}
	433	if pref_cookie:
	434	try:
	435	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	436	except ValueError:
	437	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	438	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	439	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	440
	441	def _real_initialize(self):
	442	self._initialize_pref()
	443	self._initialize_consent()
	444	self._check_login_required()
	445
	446	def _check_login_required(self):
	447	if self._LOGIN_REQUIRED and not self._cookies_passed:
	448	self.raise_login_required('Login details are needed to download this content', method='cookies')
	449
	450	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	451	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	452
	453	def _get_default_ytcfg(self, client='web'):
	454	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	455
	456	def _get_innertube_host(self, client='web'):
	457	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	458
	459	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	460	# try_get but with fallback to default ytcfg client values when present
	461	_func = lambda y: try_get(y, getter, expected_type)
	462	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	463
	464	def _extract_client_name(self, ytcfg, default_client='web'):
	465	return self._ytcfg_get_safe(
	466	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	467	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	468
	469	def _extract_client_version(self, ytcfg, default_client='web'):
	470	return self._ytcfg_get_safe(
	471	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	472	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	473
	474	def _select_api_hostname(self, req_api_hostname, default_client=None):
	475	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	476	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	477
	478	def _extract_api_key(self, ytcfg=None, default_client='web'):
	479	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	480
	481	def _extract_context(self, ytcfg=None, default_client='web'):
	482	context = get_first(
	483	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	484	# Enforce language and tz for extraction
	485	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	486	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	487	return context
	488
	489	_SAPISID = None
	490
	491	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	492	time_now = round(time.time())
	493	if self._SAPISID is None:
	494	yt_cookies = self._get_cookies('https://www.youtube.com')
	495	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	496	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	497	sapisid_cookie = dict_get(
	498	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	499	if sapisid_cookie and sapisid_cookie.value:
	500	self._SAPISID = sapisid_cookie.value

1

import base64

import calendar

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

21

from .openload import PhantomJSwrapper

22

from ..compat import functools

23

from ..jsinterp import JSInterpreter

24

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

69

INNERTUBE_CLIENTS = {

70

'web': {

71

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

72

'INNERTUBE_CONTEXT': {

73

'client': {

74

'clientName': 'WEB',

75

'clientVersion': '2.20220801.00.00',

76

}

77

},

78

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

79

},

80

'web_embedded': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB_EMBEDDED_PLAYER',

85

'clientVersion': '1.20220731.00.00',

86

},

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

89

},

90

'web_music': {

91

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

92

'INNERTUBE_HOST': 'music.youtube.com',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_REMIX',

96

'clientVersion': '1.20220727.01.00',

97

}

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

100

},

101

'web_creator': {

102

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_CREATOR',

106

'clientVersion': '1.20220726.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

110

},

111

'android': {

112

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'ANDROID',

116

'clientVersion': '17.31.35',

117

'androidSdkVersion': 30,

118

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

119

}

120

},

121

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

122

'REQUIRE_JS_PLAYER': False

123

},

124

'android_embedded': {

125

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

126

'INNERTUBE_CONTEXT': {

127

'client': {

128

'clientName': 'ANDROID_EMBEDDED_PLAYER',

129

'clientVersion': '17.31.35',

130

'androidSdkVersion': 30,

131

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

132

},

133

},

134

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

135

'REQUIRE_JS_PLAYER': False

136

},

137

'android_music': {

138

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

139

'INNERTUBE_CONTEXT': {

140

'client': {

141

'clientName': 'ANDROID_MUSIC',

142

'clientVersion': '5.16.51',

143

'androidSdkVersion': 30,

144

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

145

}

146

},

147

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

148

'REQUIRE_JS_PLAYER': False

149

},

150

'android_creator': {

151

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

152

'INNERTUBE_CONTEXT': {

153

'client': {

154

'clientName': 'ANDROID_CREATOR',

155

'clientVersion': '22.30.100',

156

'androidSdkVersion': 30,

157

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '17.33.2',

171

'deviceModel': 'iPhone14,3',

172

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

173

}

174

},

175

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

176

'REQUIRE_JS_PLAYER': False

177

},

178

'ios_embedded': {

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MESSAGES_EXTENSION',

182

'clientVersion': '17.33.2',

183

'deviceModel': 'iPhone14,3',

184

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '5.21',

196

'deviceModel': 'iPhone14,3',

197

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

198

},

199

},

200

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

201

'REQUIRE_JS_PLAYER': False

202

},

203

'ios_creator': {

204

'INNERTUBE_CONTEXT': {

205

'client': {

206

'clientName': 'IOS_CREATOR',

207

'clientVersion': '22.33.101',

208

'deviceModel': 'iPhone14,3',

209

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

210

},

211

},

212

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

213

'REQUIRE_JS_PLAYER': False

214

},

215

# mweb has 'ultralow' formats

216

# See: https://github.com/yt-dlp/yt-dlp/pull/557

217

'mweb': {

218

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

219

'INNERTUBE_CONTEXT': {

220

'client': {

221

'clientName': 'MWEB',

222

'clientVersion': '2.20220801.00.00',

223

}

224

},

225

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

226

},

227

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

228

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

229

'tv_embedded': {

230

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

231

'INNERTUBE_CONTEXT': {

232

'client': {

233

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

234

'clientVersion': '2.0',

235

},

236

},

237

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

243

variant, *base = client_name.rsplit('.', 1)

244

if base:

245

return variant, base[0], variant

246

base, *variant = client_name.split('_', 1)

247

return client_name, base, variant[0] if variant else None

248

249

250

def build_innertube_clients():

251

THIRD_PARTY = {

252

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

253

}

254

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

255

priority = qualities(BASE_CLIENTS[::-1])

256

257

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

258

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

259

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

260

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

261

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

262

263

_, base_client, variant = _split_innertube_client(client)

264

ytcfg['priority'] = 10 * priority(base_client)

265

266

if not variant:

267

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

268

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

269

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

270

embedscreen['priority'] -= 3

271

elif variant == 'embedded':

272

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

273

ytcfg['priority'] -= 2

274

else:

275

ytcfg['priority'] -= 3

276

277

278

build_innertube_clients()

279

280

281

class BadgeType(enum.Enum):

282

AVAILABILITY_UNLISTED = enum.auto()

283

AVAILABILITY_PRIVATE = enum.auto()

284

AVAILABILITY_PUBLIC = enum.auto()

285

AVAILABILITY_PREMIUM = enum.auto()

286

AVAILABILITY_SUBSCRIPTION = enum.auto()

287

LIVE_NOW = enum.auto()

288

289

290

class YoutubeBaseInfoExtractor(InfoExtractor):

291

"""Provide base functions for Youtube extractors"""

292

293

_RESERVED_NAMES = (

294

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

300

301

# _NETRC_MACHINE = 'youtube'

302

303

# If True it will raise an error if no login info is provided

304

_LOGIN_REQUIRED = False

305

306

_INVIDIOUS_SITES = (

307

# invidious-redirect websites

308

r'(?:www\.)?redirect\.invidious\.io',

309

r'(?:(?:www|dev)\.)?invidio\.us',

310

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

311

r'(?:www\.)?invidious\.pussthecat\.org',

312

r'(?:www\.)?invidious\.zee\.li',

313

r'(?:www\.)?invidious\.ethibox\.fr',

314

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

315

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

316

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

317

# youtube-dl invidious instances list

318

r'(?:(?:www|no)\.)?invidiou\.sh',

319

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

320

r'(?:www\.)?invidious\.kabi\.tk',

321

r'(?:www\.)?invidious\.mastodon\.host',

322

r'(?:www\.)?invidious\.zapashcanon\.fr',

323

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

324

r'(?:www\.)?invidious\.tinfoil-hat\.net',

325

r'(?:www\.)?invidious\.himiko\.cloud',

326

r'(?:www\.)?invidious\.reallyancient\.tech',

327

r'(?:www\.)?invidious\.tube',

328

r'(?:www\.)?invidiou\.site',

329

r'(?:www\.)?invidious\.site',

330

r'(?:www\.)?invidious\.xyz',

331

r'(?:www\.)?invidious\.nixnet\.xyz',

332

r'(?:www\.)?invidious\.048596\.xyz',

333

r'(?:www\.)?invidious\.drycat\.fr',

334

r'(?:www\.)?inv\.skyn3t\.in',

335

r'(?:www\.)?tube\.poal\.co',

336

r'(?:www\.)?tube\.connect\.cafe',

337

r'(?:www\.)?vid\.wxzm\.sx',

338

r'(?:www\.)?vid\.mint\.lgbt',

339

r'(?:www\.)?vid\.puffyan\.us',

340

r'(?:www\.)?yewtu\.be',

341

r'(?:www\.)?yt\.elukerio\.org',

342

r'(?:www\.)?yt\.lelux\.fi',

343

r'(?:www\.)?invidious\.ggc-project\.de',

344

r'(?:www\.)?yt\.maisputain\.ovh',

345

r'(?:www\.)?ytprivate\.com',

346

r'(?:www\.)?invidious\.13ad\.de',

347

r'(?:www\.)?invidious\.toot\.koeln',

348

r'(?:www\.)?invidious\.fdn\.fr',

349

r'(?:www\.)?watch\.nettohikari\.com',

350

r'(?:www\.)?invidious\.namazso\.eu',

351

r'(?:www\.)?invidious\.silkky\.cloud',

352

r'(?:www\.)?invidious\.exonip\.de',

353

r'(?:www\.)?invidious\.riverside\.rocks',

354

r'(?:www\.)?invidious\.blamefran\.net',

355

r'(?:www\.)?invidious\.moomoo\.de',

356

r'(?:www\.)?ytb\.trom\.tf',

357

r'(?:www\.)?yt\.cyberhost\.uk',

358

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

359

r'(?:www\.)?qklhadlycap4cnod\.onion',

360

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

361

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

362

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

363

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

364

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

365

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

366

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

367

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

368

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

369

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

370

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

371

r'(?:www\.)?piped\.kavin\.rocks',

372

r'(?:www\.)?piped\.silkky\.cloud',

373

r'(?:www\.)?piped\.tokhmi\.xyz',

374

r'(?:www\.)?piped\.moomoo\.me',

375

r'(?:www\.)?il\.ax',

376

r'(?:www\.)?piped\.syncpundit\.com',

377

r'(?:www\.)?piped\.mha\.fi',

378

r'(?:www\.)?piped\.mint\.lgbt',

379

r'(?:www\.)?piped\.privacy\.com\.de',

380

)

381

382

# extracted from account/account_menu ep

383

# XXX: These are the supported YouTube UI and API languages,

384

# which is slightly different from languages supported for translation in YouTube studio

385

_SUPPORTED_LANG_CODES = [

386

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

387

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

388

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

389

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

390

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

391

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

392

]

393

394

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

395

396

@functools.cached_property

397

def _preferred_lang(self):

398

"""

399

Returns a language code supported by YouTube for the user preferred language.

400

Returns None if no preferred language set.

401

"""

402

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

403

if not preferred_lang:

404

return

405

if preferred_lang not in self._SUPPORTED_LANG_CODES:

406

raise ExtractorError(

407

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

408

expected=True)

409

elif preferred_lang != 'en':

410

self.report_warning(

411

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

412

return preferred_lang

413

414

def _initialize_consent(self):

415

cookies = self._get_cookies('https://www.youtube.com/')

416

if cookies.get('__Secure-3PSID'):

417

return

418

consent_id = None

419

consent = cookies.get('CONSENT')

420

if consent:

421

if 'YES' in consent.value:

422

return

423

consent_id = self._search_regex(

424

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

425

if not consent_id:

426

consent_id = random.randint(100, 999)

427

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

428

429

def _initialize_pref(self):

430

cookies = self._get_cookies('https://www.youtube.com/')

431

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

436

except ValueError:

437

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

438

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

439

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

440

441

def _real_initialize(self):

442

self._initialize_pref()

443

self._initialize_consent()

444

self._check_login_required()

445

446

def _check_login_required(self):

447

if self._LOGIN_REQUIRED and not self._cookies_passed:

448

self.raise_login_required('Login details are needed to download this content', method='cookies')

449

450

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

451

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

452

453

def _get_default_ytcfg(self, client='web'):

454

return copy.deepcopy(INNERTUBE_CLIENTS[client])

455

456

def _get_innertube_host(self, client='web'):

457

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

458

459

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

460

# try_get but with fallback to default ytcfg client values when present

461

_func = lambda y: try_get(y, getter, expected_type)

462

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

463

464

def _extract_client_name(self, ytcfg, default_client='web'):

465

return self._ytcfg_get_safe(

466

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

467

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

468

469

def _extract_client_version(self, ytcfg, default_client='web'):

470

return self._ytcfg_get_safe(

471

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

472

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

473

474

def _select_api_hostname(self, req_api_hostname, default_client=None):

475

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

476

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

477

478

def _extract_api_key(self, ytcfg=None, default_client='web'):

479

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

480

481

def _extract_context(self, ytcfg=None, default_client='web'):

482

context = get_first(

483

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

484

# Enforce language and tz for extraction

485

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

486

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

492

time_now = round(time.time())

493

if self._SAPISID is None:

494

yt_cookies = self._get_cookies('https://www.youtube.com')

495

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

496

# See: https://github.com/yt-dlp/yt-dlp/issues/393

497

sapisid_cookie = dict_get(

498

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

499

if sapisid_cookie and sapisid_cookie.value:

500

self._SAPISID = sapisid_cookie.value

501

self.write_debug('Extracted SAPISID cookie')

502

# SAPISID cookie is required if not already present

503

if not yt_cookies.get('SAPISID'):

504

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

505

self._set_cookie(

506

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

507

else:

508

self._SAPISID = False

509

if not self._SAPISID:

510

return None

511

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

512

sapisidhash = hashlib.sha1(

513

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

514

return f'SAPISIDHASH {time_now}_{sapisidhash}'

515

516

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

517

note='Downloading API JSON', errnote='Unable to download API page',

518

context=None, api_key=None, api_hostname=None, default_client='web'):

519

520

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

521

data.update(query)

522

real_headers = self.generate_api_headers(default_client=default_client)

523

real_headers.update({'content-type': 'application/json'})

524

if headers:

525

real_headers.update(headers)

526

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

527

or api_key or self._extract_api_key(default_client=default_client))

528

return self._download_json(

529

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

530

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

531

data=json.dumps(data).encode('utf8'), headers=real_headers,

532

query={'key': api_key, 'prettyPrint': 'false'})

533

534

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

535

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

536

537

@staticmethod

538

def _extract_session_index(*data):

539

"""

540

Index of current account in account list.

541

See: https://github.com/yt-dlp/yt-dlp/pull/519

542

"""

543

for ytcfg in data:

544

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

545

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

550

if ytcfg:

551

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

556

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

557

'identity token', default=None, fatal=False)

558

559

@staticmethod

560

def _extract_account_syncid(*args):

561

"""

562

Extract syncId required to download private playlists of secondary channels

563

@params response and/or ytcfg

564

"""

565

for data in args:

566

# ytcfg includes channel_syncid if on secondary channel

567

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

572

lambda x: x['DATASYNC_ID']), str) or '').split('||')

573

if len(sync_ids) >= 2 and sync_ids[1]:

574

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

575

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

580

"""

581

Extracts visitorData from an API response or ytcfg

582

Appears to be used to track session state

583

"""

584

return get_first(

585

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

586

expected_type=str)

587

588

@functools.cached_property

589

def is_authenticated(self):

590

return bool(self._generate_sapisidhash_header())

591

592

def extract_ytcfg(self, video_id, webpage):

593

if not webpage:

594

return {}

595

return self._parse_json(

596

self._search_regex(

597

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

598

default='{}'), video_id, fatal=False) or {}

599

600

def generate_api_headers(

601

self, *, ytcfg=None, account_syncid=None, session_index=None,

602

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

603

604

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

605

headers = {

606

'X-YouTube-Client-Name': str(

607

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

608

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

609

'Origin': origin,

610

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

611

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

612

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

613

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

614

}

615

if session_index is None:

616

session_index = self._extract_session_index(ytcfg)

617

if account_syncid or session_index is not None:

618

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

619

620

auth = self._generate_sapisidhash_header(origin)

621

if auth is not None:

622

headers['Authorization'] = auth

623

headers['X-Origin'] = origin

624

return filter_dict(headers)

625

626

def _download_ytcfg(self, client, video_id):

627

url = {

628

'web': 'https://www.youtube.com',

629

'web_music': 'https://music.youtube.com',

630

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

635

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

636

return self.extract_ytcfg(video_id, webpage) or {}

637

638

@staticmethod

639

def _build_api_continuation_query(continuation, ctp=None):

640

query = {

641

'continuation': continuation

642

}

643

# TODO: Inconsistency with clickTrackingParams.

644

# Currently we have a fixed ctp contained within context (from ytcfg)

645

# and a ctp in root query for continuation.

646

if ctp:

647

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

652

next_continuation = try_get(

653

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

654

lambda x: x['continuation']['reloadContinuationData']), dict)

655

if not next_continuation:

656

return

657

continuation = next_continuation.get('continuation')

658

if not continuation:

659

return

660

ctp = next_continuation.get('clickTrackingParams')

661

return cls._build_api_continuation_query(continuation, ctp)

662

663

@classmethod

664

def _extract_continuation_ep_data(cls, continuation_ep: dict):

665

if isinstance(continuation_ep, dict):

666

continuation = try_get(

667

continuation_ep, lambda x: x['continuationCommand']['token'], str)

668

if not continuation:

669

return

670

ctp = continuation_ep.get('clickTrackingParams')

671

return cls._build_api_continuation_query(continuation, ctp)

672

673

@classmethod

674

def _extract_continuation(cls, renderer):

675

next_continuation = cls._extract_next_continuation_data(renderer)

676

if next_continuation:

677

return next_continuation

678

679

return traverse_obj(renderer, (

680

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

681

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

682

), get_all=False, expected_type=cls._extract_continuation_ep_data)

683

684

@classmethod

685

def _extract_alerts(cls, data):

686

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

687

if not isinstance(alert_dict, dict):

688

continue

689

for alert in alert_dict.values():

690

alert_type = alert.get('type')

691

if not alert_type:

692

continue

693

message = cls._get_text(alert, 'text')

694

if message:

695

yield alert_type, message

696

697

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

698

errors, warnings = [], []

699

for alert_type, alert_message in alerts:

700

if alert_type.lower() == 'error' and fatal:

701

errors.append([alert_type, alert_message])

702

elif alert_message not in self._IGNORED_WARNINGS:

703

warnings.append([alert_type, alert_message])

704

705

for alert_type, alert_message in (warnings + errors[:-1]):

706

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

707

if errors:

708

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

709

710

def _extract_and_report_alerts(self, data, *args, **kwargs):

711

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

712

713

def _extract_badges(self, renderer: dict):

714

privacy_icon_map = {

715

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

716

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

717

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

722

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

723

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

728

'private': BadgeType.AVAILABILITY_PRIVATE,

729

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

730

'live': BadgeType.LIVE_NOW,

731

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

736

badge_type = (

737

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

738

or badge_style_map.get(traverse_obj(badge, 'style'))

739

)

740

if badge_type:

741

badges.append({'type': badge_type})

742

continue

743

744

# fallback, won't work in some languages

745

label = traverse_obj(badge, 'label', expected_type=str, default='')

746

for match, label_badge_type in label_map.items():

747

if match in label.lower():

748

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

755

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

756

757

@staticmethod

758

def _get_text(data, *path_list, max_runs=None):

759

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

764

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

765

obj = [obj]

766

for item in obj:

767

text = try_get(item, lambda x: x['simpleText'], str)

768

if text:

769

return text

770

runs = try_get(item, lambda x: x['runs'], list) or []

771

if not runs and isinstance(item, list):

772

runs = item

773

774

runs = runs[:min(len(runs), max_runs or len(runs))]

775

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

780

count_text = self._get_text(data, *path_list) or ''

781

count = parse_count(count_text)

782

if count is None:

783

count = str_to_int(

784

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

789

"""

790

Extract thumbnails from thumbnails dict

791

@param path_list: path list to level that contains 'thumbnails' key

792

"""

793

thumbnails = []

794

for path in path_list or [()]:

795

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

796

thumbnail_url = url_or_none(thumbnail.get('url'))

797

if not thumbnail_url:

798

continue

799

# Sometimes youtube gives a wrong thumbnail URL. See:

800

# https://github.com/yt-dlp/yt-dlp/issues/233

801

# https://github.com/ytdl-org/youtube-dl/issues/28023

802

if 'maxresdefault' in thumbnail_url:

803

thumbnail_url = thumbnail_url.split('?')[0]

804

thumbnails.append({

805

'url': thumbnail_url,

806

'height': int_or_none(thumbnail.get('height')),

807

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

813

"""

814

Extracts a relative time from string and converts to dt object

815

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

820

if start:

821

return datetime_from_str(start)

822

try:

823

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

828

if not text:

829

return

830

dt = self.extract_relative_time(text)

831

timestamp = None

832

if isinstance(dt, datetime.datetime):

833

timestamp = calendar.timegm(dt.timetuple())

834

835

if timestamp is None:

836

timestamp = (

837

unified_timestamp(text) or unified_timestamp(

838

self._search_regex(

839

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

840

text.lower(), 'time text', default=None)))

841

842

if text and timestamp is None and self._preferred_lang in (None, 'en'):

843

self.report_warning(

844

f'Cannot parse localized time text "{text}"', only_once=True)

845

return timestamp

846

847

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

848

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

849

default_client='web'):

850

for retry in self.RetryManager():

851

try:

852

response = self._call_api(

853

ep=ep, fatal=True, headers=headers,

854

video_id=item_id, query=query, note=note,

855

context=self._extract_context(ytcfg, default_client),

856

api_key=self._extract_api_key(ytcfg, default_client),

857

api_hostname=api_hostname, default_client=default_client)

858

except ExtractorError as e:

859

if not isinstance(e.cause, network_exceptions):

860

return self._error_or_warning(e, fatal=fatal)

861

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

866

if not is_html(first_bytes):

867

yt_error = try_get(

868

self._parse_json(

869

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

870

lambda x: x['error']['message'], str)

871

if yt_error:

872

self._report_alerts([('ERROR', yt_error)], fatal=False)

873

# Downloading page may result in intermittent 5xx HTTP error

874

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

875

# We also want to catch all other network exceptions since errors in later pages can be troublesome

876

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

877

if e.cause.code not in (403, 429):

878

retry.error = e

879

continue

880

return self._error_or_warning(e, fatal=fatal)

881

882

try:

883

self._extract_and_report_alerts(response, only_once=True)

884

except ExtractorError as e:

885

# YouTube servers may return errors we want to retry on in a 200 OK response

886

# See: https://github.com/yt-dlp/yt-dlp/issues/839

887

if 'unknown error' in e.msg.lower():

888

retry.error = e

889

continue

890

return self._error_or_warning(e, fatal=fatal)

891

# Youtube sometimes sends incomplete data

892

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

893

if not traverse_obj(response, *variadic(check_get_keys)):

894

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

901

return re.match(r'https?://music\.youtube\.com/', url) is not None

902

903

def _extract_video(self, renderer):

904

video_id = renderer.get('videoId')

905

title = self._get_text(renderer, 'title')

906

description = self._get_text(renderer, 'descriptionSnippet')

907

duration = parse_duration(self._get_text(

908

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

909

if duration is None:

910

duration = parse_duration(self._search_regex(

911

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

912

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

913

video_id, default=None, group='duration'))

914

915

view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')

916

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

917

channel_id = traverse_obj(

918

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

919

expected_type=str, get_all=False)

920

time_text = self._get_text(renderer, 'publishedTimeText') or ''

921

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

922

overlay_style = traverse_obj(

923

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

924

get_all=False, expected_type=str)

925

badges = self._extract_badges(renderer)

926

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

927

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

928

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

929

expected_type=str)) or ''

930

url = f'https://www.youtube.com/watch?v={video_id}'

931

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

932

url = f'https://www.youtube.com/shorts/{video_id}'

933

934

live_status = (

935

'is_upcoming' if scheduled_timestamp is not None

936

else 'was_live' if 'streamed' in time_text.lower()

937

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

else None)

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

947

'duration': duration,

948

'uploader': uploader,

949

'channel_id': channel_id,

950

'thumbnails': thumbnails,

951

'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')

952

if self._configuration_arg('approximate_date', ie_key='youtubetab')

953

else None),

954

'release_timestamp': scheduled_timestamp,

955

'availability':

956

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

957

else self._availability(

958

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

959

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

960

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

961

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

962

'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,

}

class YoutubeIE(YoutubeBaseInfoExtractor):

967

IE_DESC = 'YouTube'

968

_VALID_URL = r"""(?x)^

969

(

970

(?:https?://|//) # http(s):// or protocol-independent URL

971

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

972

(?:www\.)?deturl\.com/www\.youtube\.com|

973

(?:www\.)?pwnyoutube\.com|

974

(?:www\.)?hooktube\.com|

975

(?:www\.)?yourepeat\.com|

976

tube\.majestyc\.net|

977

%(invidious)s|

978

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

979

(?:.*?\#/)? # handle anchor (#/) redirect urls

980

(?: # the various things that can precede the ID:

981

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

982

|(?: # or the v= param in all its forms

983

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

984

(?:\?|\#!?) # the params delimiter ? or # or #!

985

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

991

vid\.plus| # or vid.plus/xxxx

992

zwearz\.com/watch| # or zwearz.com/watch/xxxx

993

%(invidious)s

994

)/

995

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

996

)

997

)? # all until now is optional -> you can pass the naked ID

998

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

999

(?(1).+)? # if we found the ID, everything can follow

1000

(?:\#|$)""" % {

1001

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1015

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1016

\1''',

1017

# https://wordpress.org/plugins/lazy-load-for-videos/

1018

r'''(?xs)

1019

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1020

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

]

_PLAYER_INFO_RE = (

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1025

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1026

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1027

)

1028

_formats = {

1029

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1030

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1031

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1032

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1033

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1034

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1035

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1036

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1037

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1038

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1039

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1040

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1041

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1042

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1043

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1044

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1045

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1046

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1051

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1052

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1053

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1054

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1055

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1056

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1057

1058

# Apple HTTP Live Streaming

1059

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1060

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1061

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1062

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1063

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1064

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1065

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1066

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1067

1068

# DASH mp4 video

1069

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1070

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1071

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1072

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1073

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1074

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1075

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1076

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1077

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1078

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1079

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1080

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1081

1082

# Dash mp4 audio

1083

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1084

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1085

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1086

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1087

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1088

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1089

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1090

1091

# Dash webm

1092

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1093

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1094

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1095

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1096

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1097

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1098

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1099

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1100

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1101

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1102

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1103

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1104

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1105

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1106

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1107

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1108

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1109

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1110

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1111

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1112

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1113

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1114

1115

# Dash webm audio

1116

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1117

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1118

1119

# Dash webm audio with opus inside

1120

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1121

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1122

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1123

1124

# RTMP (unnamed)

1125

'_rtmp': {'protocol': 'rtmp'},

1126

1127

# av01 video only formats sometimes served with "unknown" codecs

1128

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1129

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1130

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1131

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1132

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1133

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1134

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1135

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1136

}

1137

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1149

'uploader': 'Philipp Hagemeister',

1150

'uploader_id': 'phihag',

1151

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1152

'channel': 'Philipp Hagemeister',

1153

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1154

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1155

'upload_date': '20121002',

1156

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1157

'categories': ['Science & Technology'],

1158

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1163

'playable_in_embed': True,

1164

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1165

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1170

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1175

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1180

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1181

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1182

'uploader': 'SET India',

1183

'uploader_id': 'setindia',

1184

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1185

'age_limit': 18,

1186

},

1187

'skip': 'Private video',

1188

},

1189

{

1190

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1191

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1196

'uploader': 'Philipp Hagemeister',

1197

'uploader_id': 'phihag',

1198

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1199

'channel': 'Philipp Hagemeister',

1200

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1201

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1202

'upload_date': '20121002',

1203

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1204

'categories': ['Science & Technology'],

1205

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1210

'playable_in_embed': True,

1211

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1212

'live_status': 'not_live',

1213

'age_limit': 0,

1214

'comment_count': int,

1215

'channel_follower_count': int

1216

},

1217

'params': {

1218

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1223

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1228

'uploader_id': '8KVIDEO',

1229

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1230

'description': '',

1231

'uploader': '8KVIDEO',

1232

'title': 'UHDTV TEST 8K VIDEO.mp4'

1233

},

1234

'params': {

1235

'youtube_include_dash_manifest': True,

1236

'format': '141',

1237

},

1238

'skip': 'format 141 not served anymore',

1239

},

1240

# DASH manifest with encrypted signature

1241

{

1242

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1247

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1248

'duration': 244,

1249

'uploader': 'AfrojackVEVO',

1250

'uploader_id': 'AfrojackVEVO',

1251

'upload_date': '20131011',

1252

'abr': 129.495,

1253

'like_count': int,

1254

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1255

'playable_in_embed': True,

1256

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1257

'view_count': int,

1258

'track': 'The Spark',

1259

'live_status': 'not_live',

1260

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1261

'channel': 'Afrojack',

1262

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1263

'tags': 'count:19',

1264

'availability': 'public',

1265

'categories': ['Music'],

1266

'age_limit': 0,

1267

'alt_title': 'The Spark',

1268

'channel_follower_count': int

1269

},

1270

'params': {

1271

'youtube_include_dash_manifest': True,

1272

'format': '141/bestaudio[ext=m4a]',

1273

},

1274

},

1275

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1276

{

1277

'note': 'Embed allowed age-gate video',

1278

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1283

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1284

'duration': 142,

1285

'uploader': 'The Witcher',

1286

'uploader_id': 'WitcherGame',

1287

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1288

'upload_date': '20140605',

1289

'age_limit': 18,

1290

'categories': ['Gaming'],

1291

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1292

'availability': 'needs_auth',

1293

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1294

'like_count': int,

1295

'channel': 'The Witcher',

1296

'live_status': 'not_live',

1297

'tags': 'count:17',

1298

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1299

'playable_in_embed': True,

1300

'view_count': int,

1301

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1306

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1311

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1312

'upload_date': '20200408',

1313

'uploader_id': 'FlyingKitty900',

1314

'uploader': 'FlyingKitty',

1315

'age_limit': 18,

1316

'availability': 'needs_auth',

1317

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1318

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1319

'channel': 'FlyingKitty',

1320

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1321

'view_count': int,

1322

'categories': ['Entertainment'],

1323

'live_status': 'not_live',

1324

'tags': ['Flyingkitty', 'godzilla 2'],

1325

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1326

'like_count': int,

1327

'duration': 177,

1328

'playable_in_embed': True,

1329

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1334

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1335

'info_dict': {

1336

'id': 'Tq92D6wQ1mg',

1337

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1338

'ext': 'mp4',

1339

'upload_date': '20191228',

1340

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1341

'uploader': 'Projekt Melody',

1342

'description': 'md5:17eccca93a786d51bc67646756894066',

1343

'age_limit': 18,

1344

'like_count': int,

1345

'availability': 'needs_auth',

1346

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1347

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1348

'view_count': int,

1349

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1350

'channel': 'Projekt Melody',

1351

'live_status': 'not_live',

1352

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1353

'playable_in_embed': True,

1354

'categories': ['Entertainment'],

1355

'duration': 106,

1356

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1357

'comment_count': int,

1358

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1363

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1368

'uploader': 'Herr Lurik',

1369

'uploader_id': 'st3in234',

1370

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1371

'upload_date': '20130730',

1372

'track': 'Such mich find mich',

1373

'age_limit': 0,

1374

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1375

'like_count': int,

1376

'playable_in_embed': False,

1377

'creator': 'OOMPH!',

1378

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1379

'view_count': int,

1380

'alt_title': 'Such mich find mich',

1381

'duration': 210,

1382

'channel': 'Herr Lurik',

1383

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1384

'categories': ['Music'],

1385

'availability': 'public',

1386

'uploader_url': 'http://www.youtube.com/user/st3in234',

1387

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1388

'live_status': 'not_live',

1389

'artist': 'OOMPH!',

1390

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1395

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1396

'only_matching': True,

1397

},

1398

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1399

# YouTube Red ad is not captured for creator

1400

{

1401

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1407

'uploader_id': 'deadmau5',

1408

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1409

'creator': 'deadmau5',

1410

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1411

'uploader': 'deadmau5',

1412

'title': 'Deadmau5 - Some Chords (HD)',

1413

'alt_title': 'Some Chords',

1414

'availability': 'public',

1415

'tags': 'count:14',

1416

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1417

'view_count': int,

1418

'live_status': 'not_live',

1419

'channel': 'deadmau5',

1420

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1421

'like_count': int,

1422

'track': 'Some Chords',

1423

'artist': 'deadmau5',

1424

'playable_in_embed': True,

1425

'age_limit': 0,

1426

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1427

'categories': ['Music'],

1428

'album': 'Some Chords',

1429

'channel_follower_count': int

1430

},

1431

'expected_warnings': [

1432

'DASH manifest missing',

1433

]

1434

},

1435

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1436

{

1437

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1443

'uploader_id': 'olympic',

1444

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1445

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1446

'uploader': 'Olympics',

1447

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1448

'like_count': int,

1449

'release_timestamp': 1343767800,

1450

'playable_in_embed': True,

1451

'categories': ['Sports'],

1452

'release_date': '20120731',

1453

'channel': 'Olympics',

1454

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1455

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1456

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1457

'age_limit': 0,

1458

'availability': 'public',

1459

'live_status': 'was_live',

1460

'view_count': int,

1461

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1462

'channel_follower_count': int

1463

},

1464

'params': {

1465

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1475

'duration': 85,

1476

'upload_date': '20110310',

1477

'uploader_id': 'AllenMeow',

1478

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1479

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1480

'uploader': '孫ᄋᄅ',

1481

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1482

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1487

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1488

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1489

'view_count': int,

1490

'categories': ['People & Blogs'],

1491

'like_count': int,

1492

'live_status': 'not_live',

1493

'availability': 'unlisted',

1494

'comment_count': int,

1495

'channel_follower_count': int

1496

},

1497

},

1498

# url_encoded_fmt_stream_map is empty string

1499

{

1500

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1505

'description': '',

1506

'upload_date': '20150404',

1507

'uploader_id': 'spbelect',

1508

'uploader': 'Наблюдатели Петербурга',

1509

},

1510

'params': {

1511

'skip_download': 'requires avconv',

1512

},

1513

'skip': 'This live event has ended.',

1514

},

1515

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1516

{

1517

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1522

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1523

'duration': 220,

1524

'upload_date': '20150625',

1525

'uploader_id': 'dorappi2000',

1526

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1527

'uploader': 'dorappi2000',

1528

'formats': 'mincount:31',

1529

},

1530

'skip': 'not actual anymore',

1531

},

1532

# DASH manifest with segment_list

1533

{

1534

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1535

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1540

'uploader': 'Airtek',

1541

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1542

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1543

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1544

},

1545

'params': {

1546

'youtube_include_dash_manifest': True,

1547

'format': '135', # bestvideo

1548

},

1549

'skip': 'This live event has ended.',

1550

},

1551

{

1552

# Multifeed videos (multiple cameras), URL is for Main Camera

1553

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1554

'info_dict': {

1555

'id': 'jvGDaLqkpTg',

1556

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1557

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1564

'description': 'md5:e03b909557865076822aa169218d6a5d',

1565

'duration': 10643,

1566

'upload_date': '20161111',

1567

'uploader': 'Team PGP',

1568

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1569

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1576

'description': 'md5:e03b909557865076822aa169218d6a5d',

1577

'duration': 10991,

1578

'upload_date': '20161111',

1579

'uploader': 'Team PGP',

1580

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1581

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1588

'description': 'md5:e03b909557865076822aa169218d6a5d',

1589

'duration': 10995,

1590

'upload_date': '20161111',

1591

'uploader': 'Team PGP',

1592

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1593

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1600

'description': 'md5:e03b909557865076822aa169218d6a5d',

1601

'duration': 10990,

1602

'upload_date': '20161111',

1603

'uploader': 'Team PGP',

1604

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1605

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1610

},

1611

'skip': 'Not multifeed anymore',

1612

},

1613

{

1614

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1615

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1616

'info_dict': {

1617

'id': 'gVfLd0zydlo',

1618

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1619

},

1620

'playlist_count': 2,

1621

'skip': 'Not multifeed anymore',

1622

},

1623

{

1624

'url': 'https://vid.plus/FlRa-iH7PGw',

1625

'only_matching': True,

1626

},

1627

{

1628

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1629

'only_matching': True,

1630

},

1631

{

1632

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1633

# Also tests cut-off URL expansion in video description (see

1634

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1635

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1636

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1641

'alt_title': 'Dark Walk',

1642

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1643

'duration': 133,

1644

'upload_date': '20151119',

1645

'uploader_id': 'IronSoulElf',

1646

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1647

'uploader': 'IronSoulElf',

1648

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1649

'track': 'Dark Walk',

1650

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1651

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1652

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1653

'categories': ['Film & Animation'],

1654

'view_count': int,

1655

'live_status': 'not_live',

1656

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1657

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1658

'tags': 'count:13',

1659

'availability': 'public',

1660

'channel': 'IronSoulElf',

1661

'playable_in_embed': True,

1662

'like_count': int,

1663

'age_limit': 0,

1664

'channel_follower_count': int

1665

},

1666

'params': {

1667

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1672

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1673

'only_matching': True,

1674

},

1675

{

1676

# Video with yt:stretch=17:0

1677

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1682

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1683

'upload_date': '20151107',

1684

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1685

'uploader': 'CH GAMER DROID',

1686

},

1687

'params': {

1688

'skip_download': True,

1689

},

1690

'skip': 'This video does not exist.',

1691

},

1692

{

1693

# Video with incomplete 'yt:stretch=16:'

1694

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1695

'only_matching': True,

1696

},

1697

{

1698

# Video licensed under Creative Commons

1699

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1704

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1705

'duration': 721,

1706

'upload_date': '20150128',

1707

'uploader_id': 'BerkmanCenter',

1708

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1709

'uploader': 'The Berkman Klein Center for Internet & Society',

1710

'license': 'Creative Commons Attribution license (reuse allowed)',

1711

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1712

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1713

'like_count': int,

1714

'age_limit': 0,

1715

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1716

'channel': 'The Berkman Klein Center for Internet & Society',

1717

'availability': 'public',

1718

'view_count': int,

1719

'categories': ['Education'],

1720

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1721

'live_status': 'not_live',

1722

'playable_in_embed': True,

1723

'comment_count': int,

1724

'channel_follower_count': int

1725

},

1726

'params': {

1727

'skip_download': True,

},

},

{

# Channel-like uploader_url

1732

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1737

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1738

'duration': 4060,

1739

'upload_date': '20151120',

1740

'uploader': 'Bernie Sanders',

1741

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1742

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1743

'license': 'Creative Commons Attribution license (reuse allowed)',

1744

'playable_in_embed': True,

1745

'tags': 'count:12',

1746

'like_count': int,

1747

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1748

'age_limit': 0,

1749

'availability': 'public',

1750

'categories': ['News & Politics'],

1751

'channel': 'Bernie Sanders',

1752

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1753

'view_count': int,

1754

'live_status': 'not_live',

1755

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1756

'comment_count': int,

1757

'channel_follower_count': int

1758

},

1759

'params': {

1760

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1765

'only_matching': True,

1766

},

1767

{

1768

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1769

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1770

'only_matching': True,

1771

},

1772

{

1773

# Rental video preview

1774

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1779

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1780

'upload_date': '20150811',

1781

'uploader': 'FlixMatrix',

1782

'uploader_id': 'FlixMatrixKaravan',

1783

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1784

'license': 'Standard YouTube License',

1785

},

1786

'params': {

1787

'skip_download': True,

1788

},

1789

'skip': 'This video is not available.',

1790

},

1791

{

1792

# YouTube Red video with episode data

1793

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1798

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1799

'duration': 2085,

1800

'upload_date': '20170118',

1801

'uploader': 'Vsauce',

1802

'uploader_id': 'Vsauce',

1803

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1804

'series': 'Mind Field',

1805

'season_number': 1,

1806

'episode_number': 1,

1807

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1808

'tags': 'count:12',

1809

'view_count': int,

1810

'availability': 'public',

1811

'age_limit': 0,

1812

'channel': 'Vsauce',

1813

'episode': 'Episode 1',

1814

'categories': ['Entertainment'],

1815

'season': 'Season 1',

1816

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1817

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1818

'like_count': int,

1819

'playable_in_embed': True,

1820

'live_status': 'not_live',

1821

'channel_follower_count': int

1822

},

1823

'params': {

1824

'skip_download': True,

1825

},

1826

'expected_warnings': [

1827

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1832

# as inappropriate or offensive to some audiences.

1833

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1838

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1839

'duration': 965,

1840

'upload_date': '20140124',

1841

'uploader': 'New Century Foundation',

1842

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1843

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1844

},

1845

'params': {

1846

'skip_download': True,

1847

},

1848

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1853

'only_matching': True,

1854

},

1855

{

1856

# geo restricted to JP

1857

'url': 'sJL6WA-aGkQ',

1858

'only_matching': True,

1859

},

1860

{

1861

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1862

'only_matching': True,

1863

},

1864

{

1865

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1866

'only_matching': True,

1867

},

1868

{

1869

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1870

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1871

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1876

'only_matching': True,

1877

},

1878

{

1879

# Video with unsupported adaptive stream type formats

1880

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1885

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1886

'duration': 433,

1887

'upload_date': '20130923',

1888

'uploader': 'Amelia Putri Harwita',

1889

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1890

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1891

'formats': 'maxcount:10',

1892

},

1893

'params': {

1894

'skip_download': True,

1895

'youtube_include_dash_manifest': False,

1896

},

1897

'skip': 'not actual anymore',

1898

},

1899

{

1900

# Youtube Music Auto-generated description

1901

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1906

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1907

'upload_date': '20190312',

1908

'uploader': 'Stephen - Topic',

1909

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1910

'artist': 'Stephen',

1911

'track': 'Voyeur Girl',

1912

'album': 'it\'s too much love to know my dear',

1913

'release_date': '20190313',

1914

'release_year': 2019,

1915

'alt_title': 'Voyeur Girl',

1916

'view_count': int,

1917

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1918

'playable_in_embed': True,

1919

'like_count': int,

1920

'categories': ['Music'],

1921

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1922

'channel': 'Stephen',

1923

'availability': 'public',

1924

'creator': 'Stephen',

1925

'duration': 169,

1926

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1927

'age_limit': 0,

1928

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1929

'tags': 'count:11',

1930

'live_status': 'not_live',

1931

'channel_follower_count': int

1932

},

1933

'params': {

1934

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1939

'only_matching': True,

1940

},

1941

{

1942

# invalid -> valid video id redirection

1943

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1948

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1949

'upload_date': '20090125',

1950

'uploader': 'Prochorowka',

1951

'uploader_id': 'Prochorowka',

1952

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1953

'artist': 'Panjabi MC',

1954

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1955

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1956

},

1957

'params': {

1958

'skip_download': True,

1959

},

1960

'skip': 'Video unavailable',

1961

},

1962

{

1963

# empty description results in an empty string

1964

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1971

'uploader_id': 'ElevageOrVert',

1972

'uploader': 'ElevageOrVert',

1973

'view_count': int,

1974

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1975

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1976

'like_count': int,

1977

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1978

'tags': [],

1979

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1980

'availability': 'public',

1981

'age_limit': 0,

1982

'categories': ['Pets & Animals'],

1983

'duration': 7,

1984

'playable_in_embed': True,

1985

'live_status': 'not_live',

1986

'channel': 'ElevageOrVert',

1987

'channel_follower_count': int

1988

},

1989

'params': {

1990

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1995

# see [2] for an example with '};' inside ytInitialPlayerResponse

1996

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1997

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1998

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2003

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2004

'upload_date': '20130831',

2005

'uploader_id': 'kudvenkat',

2006

'uploader': 'kudvenkat',

2007

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2008

'like_count': int,

2009

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2010

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2011

'live_status': 'not_live',

2012

'categories': ['Education'],

2013

'availability': 'public',

2014

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2015

'tags': 'count:12',

2016

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2021

'comment_count': int,

2022

'channel_follower_count': int

2023

},

2024

'params': {

2025

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2030

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2031

'only_matching': True,

2032

},

2033

{

2034

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2035

'only_matching': True,

2036

},

2037

{

2038

# https://github.com/ytdl-org/youtube-dl/pull/28094

2039

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2045

'upload_date': '20141120',

2046

'uploader': 'The Cinematic Orchestra - Topic',

2047

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2048

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2049

'artist': 'The Cinematic Orchestra',

2050

'track': 'Burn Out',

2051

'album': 'Every Day',

2052

'like_count': int,

2053

'live_status': 'not_live',

2054

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2059

'creator': 'The Cinematic Orchestra',

2060

'channel': 'The Cinematic Orchestra',

2061

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2062

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2063

'availability': 'public',

2064

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2065

'categories': ['Music'],

2066

'playable_in_embed': True,

2067

'channel_follower_count': int

2068

},

2069

'params': {

2070

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2075

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2076

'only_matching': True,

2077

},

2078

{

2079

# controversial video, requires bpctr/contentCheckOk

2080

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2085

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2086

'uploader': 'CBS Mornings',

2087

'uploader_id': 'CBSThisMorning',

2088

'upload_date': '20140716',

2089

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2090

'duration': 170,

2091

'categories': ['News & Politics'],

2092

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2093

'view_count': int,

2094

'channel': 'CBS Mornings',

2095

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2096

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2097

'age_limit': 18,

2098

'availability': 'needs_auth',

2099

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2100

'like_count': int,

2101

'live_status': 'not_live',

2102

'playable_in_embed': True,

2103

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2108

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2113

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2114

'upload_date': '20201120',

2115

'uploader': 'Walk around Japan',

2116

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2117

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2118

'duration': 1456,

2119

'categories': ['Travel & Events'],

2120

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2121

'view_count': int,

2122

'channel': 'Walk around Japan',

2123

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2124

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2125

'age_limit': 0,

2126

'availability': 'public',

2127

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2128

'live_status': 'not_live',

2129

'playable_in_embed': True,

2130

'channel_follower_count': int

2131

},

2132

'params': {

2133

'skip_download': True,

2134

},

2135

}, {

2136

# Has multiple audio streams

2137

'url': 'WaOKSUlf4TM',

2138

'only_matching': True

2139

}, {

2140

# Requires Premium: has format 141 when requested using YTM url

2141

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2142

'only_matching': True

2143

}, {

2144

# multiple subtitles with same lang_code

2145

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2146

'only_matching': True,

2147

}, {

2148

# Force use android client fallback

2149

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2150

'info_dict': {

2151

'id': 'YOelRv7fMxY',

2152

'title': 'DIGGING A SECRET TUNNEL Part 1',

2153

'ext': '3gp',

2154

'upload_date': '20210624',

2155

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2156

'uploader': 'colinfurze',

2157

'uploader_id': 'colinfurze',

2158

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2159

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2160

'duration': 596,

2161

'categories': ['Entertainment'],

2162

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2163

'view_count': int,

2164

'channel': 'colinfurze',

2165

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2166

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2167

'age_limit': 0,

2168

'availability': 'public',

2169

'like_count': int,

2170

'live_status': 'not_live',

2171

'playable_in_embed': True,

2172

'channel_follower_count': int

2173

},

2174

'params': {

2175

'format': '17', # 3gp format available on android

2176

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2181

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2182

'only_matching': True,

2183

'params': {

2184

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2189

'only_matching': True,

2190

}, {

2191

'note': 'Storyboards',

2192

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2198

'uploader_id': 'scishow',

2199

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2200

'upload_date': '20140324',

2201

'uploader': 'SciShow',

2202

'like_count': int,

2203

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2204

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2205

'view_count': int,

2206

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2207

'playable_in_embed': True,

2208

'tags': 'count:12',

2209

'uploader_url': 'http://www.youtube.com/user/scishow',

2210

'availability': 'public',

2211

'channel': 'SciShow',

2212

'live_status': 'not_live',

2213

'duration': 248,

2214

'categories': ['Education'],

2215

'age_limit': 0,

2216

'channel_follower_count': int

2217

}, 'params': {'format': 'mhtml', 'skip_download': True}

2218

}, {

2219

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2220

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2225

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2226

'uploader': 'Leon Nguyen',

2227

'uploader_id': 'VNSXIII',

2228

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2229

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2230

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2235

'tags': 'count:23',

2236

'playable_in_embed': True,

2237

'live_status': 'not_live',

2238

'upload_date': '20220103',

2239

'like_count': int,

2240

'availability': 'public',

2241

'channel': 'Leon Nguyen',

2242

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2243

'comment_count': int,

2244

'channel_follower_count': int

2245

}

2246

}, {

2247

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2248

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2253

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2254

'uploader': 'Leon Nguyen',

2255

'uploader_id': 'VNSXIII',

2256

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2257

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2258

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2263

'tags': 'count:23',

2264

'playable_in_embed': True,

2265

'live_status': 'not_live',

2266

'upload_date': '20220102',

2267

'like_count': int,

2268

'availability': 'public',

2269

'channel': 'Leon Nguyen',

2270

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2271

'comment_count': int,

2272

'channel_follower_count': int

2273

},

2274

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2275

}, {

2276

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2277

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2282

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2283

'uploader': 'Quackity',

2284

'uploader_id': 'QuackityHQ',

2285

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2286

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2287

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2292

'tags': 'count:26',

2293

'playable_in_embed': True,

2294

'live_status': 'not_live',

2295

'release_timestamp': 1641172509,

2296

'release_date': '20220103',

2297

'upload_date': '20220103',

2298

'like_count': int,

2299

'availability': 'public',

2300

'channel': 'Quackity',

2301

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2302

'channel_follower_count': int

2303

}

2304

},

2305

{ # continuous livestream. Microformat upload date should be preferred.

2306

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2307

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2308

'info_dict': {

2309

'id': 'kgx4WGK0oNU',

2310

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2311

'ext': 'mp4',

2312

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2313

'availability': 'public',

2314

'age_limit': 0,

2315

'release_timestamp': 1637975704,

2316

'upload_date': '20210619',

2317

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2318

'live_status': 'is_live',

2319

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2320

'uploader': '阿鲍Abao',

2321

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2322

'channel': 'Abao in Tokyo',

2323

'channel_follower_count': int,

2324

'release_date': '20211127',

2325

'tags': 'count:39',

2326

'categories': ['People & Blogs'],

2327

'like_count': int,

2328

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2329

'view_count': int,

2330

'playable_in_embed': True,

2331

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2332

'concurrent_view_count': int,

2333

},

2334

'params': {'skip_download': True}

2335

}, {

2336

# Story. Requires specific player params to work.

2337

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2342

'view_count': int,

2343

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2344

'upload_date': '20220526',

2345

'categories': ['Education'],

2346

'title': 'Story',

2347

'channel': 'IT\'S HISTORY',

2348

'description': '',

2349

'uploader_id': 'BlastfromthePast',

2350

'duration': 12,

2351

'uploader': 'IT\'S HISTORY',

2352

'playable_in_embed': True,

2353

'age_limit': 0,

2354

'live_status': 'not_live',

2355

'tags': [],

2356

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2357

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2358

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2359

},

2360

'skip': 'stories get removed after some period of time',

2361

}, {

2362

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2367

'upload_date': '20220323',

2368

'like_count': int,

2369

'availability': 'unlisted',

2370

'channel': 'nao20010128nao',

2371

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2372

'age_limit': 0,

2373

'uploader': 'nao20010128nao',

2374

'uploader_id': 'nao20010128nao',

2375

'categories': ['Music'],

2376

'view_count': int,

2377

'description': '',

2378

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2379

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2380

'live_status': 'not_live',

2381

'playable_in_embed': True,

2382

'channel_follower_count': int,

2383

'duration': 6,

2384

'tags': [],

2385

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2386

}

2387

}, {

2388

# Prefer primary title+description language metadata by default

2389

# Do not prefer translated description if primary is empty

2390

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2395

'description': '',

2396

'channel': 'cole-dlp-test-acc',

2397

'tags': [],

2398

'view_count': int,

2399

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2400

'like_count': int,

2401

'playable_in_embed': True,

2402

'availability': 'unlisted',

2403

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2404

'age_limit': 0,

2405

'duration': 5,

2406

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2407

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2408

'live_status': 'not_live',

2409

'upload_date': '20220908',

2410

'categories': ['People & Blogs'],

2411

'uploader': 'cole-dlp-test-acc',

2412

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2413

},

2414

'params': {'skip_download': True}

2415

}, {

2416

# Extractor argument: prefer translated title+description

2417

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2422

'tags': [],

2423

'duration': 5,

2424

'live_status': 'not_live',

2425

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2426

'upload_date': '20220728',

2427

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2428

'view_count': int,

2429

'categories': ['People & Blogs'],

2430

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2431

'title': 'dlp test video title translated (fr)',

2432

'availability': 'public',

2433

'uploader': 'cole-dlp-test-acc',

2434

'age_limit': 0,

2435

'description': 'dlp test video description translated (fr)',

2436

'playable_in_embed': True,

2437

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2438

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2439

},

2440

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2441

'expected_warnings': [r'Preferring "fr" translated fields'],

2442

}, {

2443

'note': '6 channel audio',

2444

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2445

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2451

{

2452

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2453

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2458

'upload_date': '20080526',

2459

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2460

'uploader': 'Christopher Sykes',

2461

'uploader_id': 'ChristopherJSykes',

2462

'age_limit': 0,

2463

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2464

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2465

'playable_in_embed': True,

2466

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2467

'like_count': int,

2468

'comment_count': int,

2469

'channel': 'Christopher Sykes',

2470

'live_status': 'not_live',

2471

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2472

'availability': 'public',

2473

'duration': 195,

2474

'view_count': int,

2475

'categories': ['Science & Technology'],

2476

'channel_follower_count': int,

2477

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2478

},

2479

'params': {

2480

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2487

from ..utils import parse_qs

2488

2489

qs = parse_qs(url)

2490

if qs.get('list', [None])[0]:

2491

return False

2492

return super().suitable(url)

2493

2494

def __init__(self, *args, **kwargs):

2495

super().__init__(*args, **kwargs)

2496

self._code_cache = {}

2497

self._player_cache = {}

2498

2499

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2500

lock = threading.Lock()

2501

start_time = time.time()

2502

formats = [f for f in formats if f.get('is_from_start')]

2503

2504

def refetch_manifest(format_id, delay):

2505

nonlocal formats, start_time, is_live

2506

if time.time() <= start_time + delay:

2507

return

2508

2509

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2510

video_details = traverse_obj(

2511

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2512

microformats = traverse_obj(

2513

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2514

expected_type=dict, default=[])

2515

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2516

is_live = live_status == 'is_live'

2517

start_time = time.time()

2518

2519

def mpd_feed(format_id, delay):

2520

"""

2521

@returns (manifest_url, manifest_stream_number, is_live) or None

2522

"""

2523

with lock:

2524

refetch_manifest(format_id, delay)

2525

2526

f = next((f for f in formats if f['format_id'] == format_id), None)

2527

if not f:

2528

if not is_live:

2529

self.to_screen(f'{video_id}: Video is no longer live')

2530

else:

2531

self.report_warning(

2532

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2533

return None

2534

return f['manifest_url'], f['manifest_stream_number'], is_live

2535

2536

for f in formats:

2537

f['is_live'] = is_live

2538

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2539

live_start_time, mpd_feed, not is_live and f.copy())

2540

if is_live:

2541

f['fragments'] = gen

2542

f['protocol'] = 'http_dash_segments_generator'

2543

else:

2544

f['fragments'] = LazyList(gen({}))

2545

del f['is_from_start']

2546

2547

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2548

FETCH_SPAN, MAX_DURATION = 5, 432000

2549

2550

mpd_url, stream_number, is_live = None, None, True

2551

2552

begin_index = 0

2553

download_start_time = ctx.get('start') or time.time()

2554

2555

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2556

if lack_early_segments:

2557

self.report_warning(bug_reports_message(

2558

'Starting download from the last 120 hours of the live stream since '

2559

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2560

lack_early_segments = True

2561

2562

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2563

fragments, fragment_base_url = None, None

2564

2565

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2566

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2567

# Obtain from MPD's maximum seq value

2568

old_mpd_url = mpd_url

2569

last_error = ctx.pop('last_error', None)

2570

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2571

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2572

or (mpd_url, stream_number, False))

2573

if not refresh_sequence:

2574

if expire_fast and not is_live:

2575

return False, last_seq

2576

elif old_mpd_url == mpd_url:

2577

return True, last_seq

2578

if manifestless_orig_fmt:

2579

fmt_info = manifestless_orig_fmt

2580

else:

2581

try:

2582

fmts, _ = self._extract_mpd_formats_and_subtitles(

2583

mpd_url, None, note=False, errnote=False, fatal=False)

2584

except ExtractorError:

2585

fmts = None

2586

if not fmts:

2587

no_fragment_score += 2

2588

return False, last_seq

2589

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2590

fragments = fmt_info['fragments']

2591

fragment_base_url = fmt_info['fragment_base_url']

2592

assert fragment_base_url

2593

2594

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2595

return True, _last_seq

2596

2597

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2598

while is_live:

2599

fetch_time = time.time()

2600

if no_fragment_score > 30:

2601

return

2602

if last_segment_url:

2603

# Obtain from "X-Head-Seqnum" header value from each segment

2604

try:

2605

urlh = self._request_webpage(

2606

last_segment_url, None, note=False, errnote=False, fatal=False)

2607

except ExtractorError:

2608

urlh = None

2609

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2610

if last_seq is None:

2611

no_fragment_score += 2

2612

last_segment_url = None

2613

continue

2614

else:

2615

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2616

no_fragment_score += 2

2617

if not should_continue:

2618

continue

2619

2620

if known_idx > last_seq:

2621

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2627

# skip from the start when it's negative value

2628

known_idx = last_seq + begin_index

2629

if lack_early_segments:

2630

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2631

try:

2632

for idx in range(known_idx, last_seq):

2633

# do not update sequence here or you'll get skipped some part of it

2634

should_continue, _ = _extract_sequence_from_mpd(False, False)

2635

if not should_continue:

2636

known_idx = idx - 1

2637

raise ExtractorError('breaking out of outer loop')

2638

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2639

yield {

2640

'url': last_segment_url,

2641

'fragment_count': last_seq,

2642

}

2643

if known_idx == last_seq:

2644

no_fragment_score += 5

2645

else:

2646

no_fragment_score = 0

2647

known_idx = last_seq

2648

except ExtractorError:

2649

continue

2650

2651

if manifestless_orig_fmt:

2652

# Stop at the first iteration if running for post-live manifestless;

2653

# fragment count no longer increase since it starts

2654

break

2655

2656

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2657

2658

def _extract_player_url(self, *ytcfgs, webpage=None):

2659

player_url = traverse_obj(

2660

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2661

get_all=False, expected_type=str)

2662

if not player_url:

2663

return

2664

return urljoin('https://www.youtube.com', player_url)

2665

2666

def _download_player_url(self, video_id, fatal=False):

2667

res = self._download_webpage(

2668

'https://www.youtube.com/iframe_api',

2669

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2670

if res:

2671

player_version = self._search_regex(

2672

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2673

if player_version:

2674

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2675

2676

def _signature_cache_id(self, example_sig):

2677

""" Return a string representation of a signature """

2678

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2679

2680

@classmethod

2681

def _extract_player_info(cls, player_url):

2682

for player_re in cls._PLAYER_INFO_RE:

2683

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2688

return id_m.group('id')

2689

2690

def _load_player(self, video_id, player_url, fatal=True):

2691

player_id = self._extract_player_info(player_url)

2692

if player_id not in self._code_cache:

2693

code = self._download_webpage(

2694

player_url, video_id, fatal=fatal,

2695

note='Downloading player ' + player_id,

2696

errnote='Download of %s failed' % player_url)

2697

if code:

2698

self._code_cache[player_id] = code

2699

return self._code_cache.get(player_id)

2700

2701

def _extract_signature_function(self, video_id, player_url, example_sig):

2702

player_id = self._extract_player_info(player_url)

2703

2704

# Read from filesystem cache

2705

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2706

assert os.path.basename(func_id) == func_id

2707

2708

self.write_debug(f'Extracting signature function {func_id}')

2709

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2710

2711

if not cache_spec:

2712

code = self._load_player(video_id, player_url)

2713

if code:

2714

res = self._parse_sig_js(code)

2715

test_string = ''.join(map(chr, range(len(example_sig))))

2716

cache_spec = [ord(c) for c in res(test_string)]

2717

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2718

2719

return lambda s: ''.join(s[i] for i in cache_spec)

2720

2721

def _print_sig_code(self, func, example_sig):

2722

if not self.get_param('youtube_print_sig_code'):

2723

return

2724

2725

def gen_sig_code(idxs):

2726

def _genslice(start, end, step):

2727

starts = '' if start == 0 else str(start)

2728

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2729

steps = '' if step == 1 else (':%d' % step)

2730

return f's[{starts}{ends}{steps}]'

2731

2732

step = None

2733

# Quelch pyflakes warnings - start will be set when step is set

2734

start = '(Never used)'

2735

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2740

step = None

2741

continue

2742

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2752

2753

test_string = ''.join(map(chr, range(len(example_sig))))

2754

cache_res = func(test_string)

2755

cache_spec = [ord(c) for c in cache_res]

2756

expr_code = ' + '.join(gen_sig_code(cache_spec))

2757

signature_id_tuple = '(%s)' % (

2758

', '.join(str(len(p)) for p in example_sig.split('.')))

2759

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2760

' return %s\n') % (signature_id_tuple, expr_code)

2761

self.to_screen('Extracted signature function:\n' + code)

2762

2763

def _parse_sig_js(self, jscode):

2764

funcname = self._search_regex(

2765

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2766

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2767

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2768

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2769

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2770

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2771

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2772

# Obsolete patterns

2773

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2774

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2775

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2776

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2777

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2778

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2779

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2780

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2781

jscode, 'Initial JS player signature function name', group='sig')

2782

2783

jsi = JSInterpreter(jscode)

2784

initial_function = jsi.extract_function(funcname)

2785

return lambda s: initial_function([s])

2786

2787

def _cached(self, func, *cache_id):

2788

def inner(*args, **kwargs):

2789

if cache_id not in self._player_cache:

2790

try:

2791

self._player_cache[cache_id] = func(*args, **kwargs)

2792

except ExtractorError as e:

2793

self._player_cache[cache_id] = e

2794

except Exception as e:

2795

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2796

2797

ret = self._player_cache[cache_id]

2798

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2804

"""Turn the encrypted s field into a working signature"""

2805

extract_sig = self._cached(

2806

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2807

func = extract_sig(video_id, player_url, s)

2808

self._print_sig_code(func, s)

2809

return func(s)

2810

2811

def _decrypt_nsig(self, s, video_id, player_url):

2812

"""Turn the encrypted n field into a working signature"""

2813

if player_url is None:

2814

raise ExtractorError('Cannot decrypt nsig without player_url')

2815

player_url = urljoin('https://www.youtube.com', player_url)

2816

2817

try:

2818

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2819

except ExtractorError as e:

2820

raise ExtractorError('Unable to extract nsig function code', cause=e)

2821

if self.get_param('youtube_print_sig_code'):

2822

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2823

2824

try:

2825

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2826

ret = extract_nsig(jsi, func_code)(s)

2827

except JSInterpreter.Exception as e:

2828

try:

2829

jsi = PhantomJSwrapper(self, timeout=5000)

2830

except ExtractorError:

2831

raise e

2832

self.report_warning(

2833

f'Native nsig extraction failed: Trying with PhantomJS\n'

2834

f' n = {s} ; player = {player_url}', video_id)

2835

self.write_debug(e)

2836

2837

args, func_body = func_code

2838

ret = jsi.execute(

2839

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2840

video_id=video_id, note='Executing signature code').strip()

2841

2842

self.write_debug(f'Decrypted nsig {s} => {ret}')

2843

return ret

2844

2845

def _extract_n_function_name(self, jscode):

2846

funcname, idx = self._search_regex(

2847

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2848

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2853

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2854

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2855

2856

def _extract_n_function_code(self, video_id, player_url):

2857

player_id = self._extract_player_info(player_url)

2858

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

2859

jscode = func_code or self._load_player(video_id, player_url)

2860

jsi = JSInterpreter(jscode)

2861

2862

if func_code:

2863

return jsi, player_id, func_code

2864

2865

func_name = self._extract_n_function_name(jscode)

2866

2867

# For redundancy

2868

func_code = self._search_regex(

2869

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

2870

# NB: The end of the regex is intentionally kept strict

2871

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

2872

jscode, 'nsig function', group=('var', 'code'), default=None)

2873

if func_code:

2874

func_code = ([func_code[0]], func_code[1])

2875

else:

2876

self.write_debug('Extracting nsig function with jsinterp')

2877

func_code = jsi.extract_function_code(func_name)

2878

2879

self.cache.store('youtube-nsig', player_id, func_code)

2880

return jsi, player_id, func_code

2881

2882

def _extract_n_function_from_code(self, jsi, func_code):

2883

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2889

raise

2890

except Exception as e:

2891

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2892

2893

if ret.startswith('enhanced_except_'):

2894

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2900

"""

2901

Extract signatureTimestamp (sts)

2902

Required to tell API what sig/player version is in use.

2903

"""

2904

sts = None

2905

if isinstance(ytcfg, dict):

2906

sts = int_or_none(ytcfg.get('STS'))

2907

2908

if not sts:

2909

# Attempt to extract from player

2910

if player_url is None:

2911

error_msg = 'Cannot extract signature timestamp without player_url.'

2912

if fatal:

2913

raise ExtractorError(error_msg)

2914

self.report_warning(error_msg)

2915

return

2916

code = self._load_player(video_id, player_url, fatal=fatal)

2917

if code:

2918

sts = int_or_none(self._search_regex(

2919

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2920

'JS player signature timestamp', group='sts', fatal=fatal))

2921

return sts

2922

2923

def _mark_watched(self, video_id, player_responses):

2924

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2925

label = 'fully ' if is_full else ''

2926

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2927

expected_type=url_or_none)

2928

if not url:

2929

self.report_warning(f'Unable to mark {label}watched')

2930

return

2931

parsed_url = urllib.parse.urlparse(url)

2932

qs = urllib.parse.parse_qs(parsed_url.query)

2933

2934

# cpn generation algorithm is reverse engineered from base.js.

2935

# In fact it works even with dummy cpn.

2936

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2937

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2938

2939

# # more consistent results setting it to right before the end

2940

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2951

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2958

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2959

2960

self._download_webpage(

2961

url, video_id, f'Marking {label}watched',

2962

'Unable to mark watched', fatal=False)

2963

2964

@classmethod

2965

def _extract_from_webpage(cls, url, webpage):

2966

# Invidious Instances

2967

# https://github.com/yt-dlp/yt-dlp/issues/195

2968

# https://github.com/iv-org/invidious/pull/1730

2969

mobj = re.search(

2970

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2971

webpage)

2972

if mobj:

2973

yield cls.url_result(mobj.group('url'), cls)

2974

raise cls.StopExtraction()

2975

2976

yield from super()._extract_from_webpage(url, webpage)

2977

2978

# lazyYT YouTube embed

2979

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2980

yield cls.url_result(unescapeHTML(id_), cls, id_)

2981

2982

# Wordpress "YouTube Video Importer" plugin

2983

for m in re.findall(r'''(?x)<div[^>]+

2984

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2985

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2986

yield cls.url_result(m[-1], cls, m[-1])

2987

2988

@classmethod

2989

def extract_id(cls, url):

2990

video_id = cls.get_temp_id(url)

2991

if not video_id:

2992

raise ExtractorError(f'Invalid URL: {url}')

2993

return video_id

2994

2995

def _extract_chapters_from_json(self, data, duration):

2996

chapter_list = traverse_obj(

2997

data, (

2998

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2999

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3000

), expected_type=list)

3001

3002

return self._extract_chapters(

3003

chapter_list,

3004

chapter_time=lambda chapter: float_or_none(

3005

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3006

chapter_title=lambda chapter: traverse_obj(

3007

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3008

duration=duration)

3009

3010

def _extract_chapters_from_engagement_panel(self, data, duration):

3011

content_list = traverse_obj(

3012

data,

3013

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3014

expected_type=list, default=[])

3015

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3016

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3017

3018

return next(filter(None, (

3019

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3020

chapter_time, chapter_title, duration)

3021

for contents in content_list)), [])

3022

3023

def _extract_chapters_from_description(self, description, duration):

3024

return self._extract_chapters(

3025

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

3026

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3027

duration=duration, strict=False)

3028

3029

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3034

'title': chapter_title(chapter),

3035

} for chapter in chapter_list or []]

3036

if not strict:

3037

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3038

3039

chapters = [{'start_time': 0}]

3040

for idx, chapter in enumerate(chapter_list):

3041

if chapter['start_time'] is None:

3042

self.report_warning(f'Incomplete chapter {idx}')

3043

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3044

chapters.append(chapter)

3045

elif chapter not in chapters:

3046

self.report_warning(

3047

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3048

return chapters[1:]

3049

3050

def _extract_comment(self, comment_renderer, parent=None):

3051

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3056

3057

# Timestamp is an estimate calculated from the current time and time_text

3058

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3059

timestamp = self._parse_time_text(time_text)

3060

3061

author = self._get_text(comment_renderer, 'authorText')

3062

author_id = try_get(comment_renderer,

3063

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3064

3065

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3066

lambda x: x['likeCount']), str)) or 0

3067

author_thumbnail = try_get(comment_renderer,

3068

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3069

3070

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3071

is_favorited = 'creatorHeart' in (try_get(

3072

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3077

'time_text': time_text,

3078

'like_count': votes,

3079

'is_favorited': is_favorited,

3080

'author': author,

3081

'author_id': author_id,

3082

'author_thumbnail': author_thumbnail,

3083

'author_is_uploader': author_is_uploader,

3084

'parent': parent or 'root'

3085

}

3086

3087

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3088

3089

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3090

3091

def extract_header(contents):

3092

_continuation = None

3093

for content in contents:

3094

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3095

expected_comment_count = self._get_count(

3096

comments_header_renderer, 'countText', 'commentsCount')

3097

3098

if expected_comment_count:

3099

tracker['est_total'] = expected_comment_count

3100

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3101

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3102

3103

sort_menu_item = try_get(

3104

comments_header_renderer,

3105

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3106

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3107

3108

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3109

if not _continuation:

3110

continue

3111

3112

sort_text = str_or_none(sort_menu_item.get('title'))

3113

if not sort_text:

3114

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3115

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3120

if not parent:

3121

tracker['current_page_thread'] = 0

3122

for content in contents:

3123

if not parent and tracker['total_parent_comments'] >= max_parents:

3124

yield

3125

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3126

comment_renderer = get_first(

3127

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3128

expected_type=dict, default={})

3129

3130

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3135

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3136

yield comment

3137

3138

# Attempt to get the replies

3139

comment_replies_renderer = try_get(

3140

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3141

3142

if comment_replies_renderer:

3143

tracker['current_page_thread'] += 1

3144

comment_entries_iter = self._comment_entries(

3145

comment_replies_renderer, ytcfg, video_id,

3146

parent=comment.get('id'), tracker=tracker)

3147

yield from itertools.islice(comment_entries_iter, min(

3148

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3149

3150

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3156

total_parent_comments=0,

3157

total_reply_comments=0)

3158

3159

# TODO: Deprecated

3160

# YouTube comments have a max depth of 2

3161

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3162

if max_depth:

3163

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3164

'Set max replies in the max-comments extractor argument instead')

3165

if max_depth == 1 and parent:

3166

return

3167

3168

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3169

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3170

3171

continuation = self._extract_continuation(root_continuation_data)

3172

3173

response = None

3174

is_forced_continuation = False

3175

is_first_continuation = parent is None

3176

if is_first_continuation and not continuation:

3177

# Sometimes you can get comments by generating the continuation yourself,

3178

# even if YouTube initially reports them being disabled - e.g. stories comments.

3179

# Note: if the comment section is actually disabled, YouTube may return a response with

3180

# required check_get_keys missing. So we will disable that check initially in this case.

3181

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3182

is_forced_continuation = True

3183

3184

for page_num in itertools.count(0):

3185

if not continuation:

3186

break

3187

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3188

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3189

if page_num == 0:

3190

if is_first_continuation:

3191

note_prefix = 'Downloading comment section API JSON'

3192

else:

3193

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3194

tracker['current_page_thread'], comment_prog_str)

3195

else:

3196

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3197

' ' if parent else '', ' replies' if parent else '',

3198

page_num, comment_prog_str)

3199

3200

response = self._extract_response(

3201

item_id=None, query=continuation,

3202

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3203

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3204

is_forced_continuation = False

3205

continuation_contents = traverse_obj(

3206

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3207

3208

continuation = None

3209

for continuation_section in continuation_contents:

3210

continuation_items = traverse_obj(

3211

continuation_section,

3212

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3213

get_all=False, expected_type=list) or []

3214

if is_first_continuation:

3215

continuation = extract_header(continuation_items)

3216

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3230

if message and not parent and tracker['running_total'] == 0:

3231

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3232

3233

@staticmethod

3234

def _generate_comment_continuation(video_id):

3235

"""

3236

Generates initial comment section continuation token from given video id

3237

"""

3238

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3239

return base64.b64encode(token.encode()).decode()

3240

3241

def _get_comments(self, ytcfg, video_id, contents, webpage):

3242

"""Entry for comment extraction"""

3243

def _real_comment_extract(contents):

3244

renderer = next((

3245

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3246

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3247

yield from self._comment_entries(renderer, ytcfg, video_id)

3248

3249

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3250

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3251

3252

@staticmethod

3253

def _get_checkok_params():

3254

return {'contentCheckOk': True, 'racyCheckOk': True}

3255

3256

@classmethod

3257

def _generate_player_context(cls, sts=None):

3258

context = {

3259

'html5Preference': 'HTML5_PREF_WANTS',

3260

}

3261

if sts is not None:

3262

context['signatureTimestamp'] = sts

3263

return {

3264

'playbackContext': {

3265

'contentPlaybackContext': context

3266

},

3267

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3272

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3273

return True

3274

3275

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3276

AGE_GATE_REASONS = (

3277

'confirm your age', 'age-restricted', 'inappropriate', # reason

3278

'age_verification_required', 'age_check_required', # status

3279

)

3280

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3281

3282

@staticmethod

3283

def _is_unplayable(player_response):

3284

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3285

3286

_STORY_PLAYER_PARAMS = '8AEB'

3287

3288

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3289

3290

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3291

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3292

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3293

headers = self.generate_api_headers(

3294

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3300

yt_query['params'] = self._STORY_PLAYER_PARAMS

3301

3302

yt_query.update(self._generate_player_context(sts))

3303

return self._extract_response(

3304

item_id=video_id, ep='player', query=yt_query,

3305

ytcfg=player_ytcfg, headers=headers, fatal=True,

3306

default_client=client,

3307

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3308

) or None

3309

3310

def _get_requested_clients(self, url, smuggled_data):

3311

requested_clients = []

3312

default = ['android', 'web']

3313

allowed_clients = sorted(

3314

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3315

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3316

for client in self._configuration_arg('player_client'):

3317

if client in allowed_clients:

3318

requested_clients.append(client)

3319

elif client == 'default':

3320

requested_clients.extend(default)

3321

elif client == 'all':

3322

requested_clients.extend(allowed_clients)

3323

else:

3324

self.report_warning(f'Skipping unsupported client {client}')

3325

if not requested_clients:

3326

requested_clients = default

3327

3328

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3329

requested_clients.extend(

3330

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3331

3332

return orderedSet(requested_clients)

3333

3334

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3335

initial_pr = None

3336

if webpage:

3337

initial_pr = self._search_json(

3338

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3339

3340

all_clients = set(clients)

3341

clients = clients[::-1]

3342

prs = []

3343

3344

def append_client(*client_names):

3345

""" Append the first client name that exists but not already used """

3346

for client_name in client_names:

3347

actual_client = _split_innertube_client(client_name)[0]

3348

if actual_client in INNERTUBE_CLIENTS:

3349

if actual_client not in all_clients:

3350

clients.append(client_name)

3351

all_clients.add(actual_client)

3352

return

3353

3354

# Android player_response does not have microFormats which are needed for

3355

# extraction of some data. So we return the initial_pr with formats

3356

# stripped out even if not requested by the user

3357

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3358

if initial_pr:

3359

pr = dict(initial_pr)

3360

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3365

player_url = None

3366

while clients:

3367

client, base_client, variant = _split_innertube_client(clients.pop())

3368

player_ytcfg = master_ytcfg if client == 'web' else {}

3369

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3370

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3371

3372

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3373

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3374

if 'js' in self._configuration_arg('player_skip'):

3375

require_js_player = False

3376

player_url = None

3377

3378

if not player_url and not tried_iframe_fallback and require_js_player:

3379

player_url = self._download_player_url(video_id)

3380

tried_iframe_fallback = True

3381

3382

try:

3383

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3384

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3385

except ExtractorError as e:

3386

if last_error:

3387

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3393

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3394

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3395

if pr_video_id and pr_video_id != video_id:

3396

self.report_warning(

3397

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3402

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3403

append_client(f'{base_client}_creator')

3404

elif self._is_agegated(pr):

3405

if variant == 'tv_embedded':

3406

append_client(f'{base_client}_embedded')

3407

elif not variant:

3408

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3414

return prs, player_url

3415

3416

def _needs_live_processing(self, live_status, duration):

3417

if (live_status == 'is_live' and self.get_param('live_from_start')

3418

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3419

return live_status

3420

3421

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3422

itags, stream_ids = {}, []

3423

itag_qualities, res_qualities = {}, {0: None}

3424

q = qualities([

3425

# Normally tiny is the smallest video-only formats. But

3426

# audio-only formats with unknown quality may get tagged as tiny

3427

'tiny',

3428

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3429

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3430

])

3431

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3432

3433

for fmt in streaming_formats:

3434

if fmt.get('targetDurationSec'):

3435

continue

3436

3437

itag = str_or_none(fmt.get('itag'))

3438

audio_track = fmt.get('audioTrack') or {}

3439

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3440

if stream_id in stream_ids:

3441

continue

3442

3443

quality = fmt.get('quality')

3444

height = int_or_none(fmt.get('height'))

3445

if quality == 'tiny' or not quality:

3446

quality = fmt.get('audioQuality', '').lower() or quality

3447

# The 3gp format (17) in android client has a quality of "small",

3448

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3454

if height:

3455

res_qualities[height] = quality

3456

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3457

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3458

# number of fragment that would subsequently requested with (`&sq=N`)

3459

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3460

continue

3461

3462

fmt_url = fmt.get('url')

3463

if not fmt_url:

3464

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3465

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3466

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3467

if not all((sc, fmt_url, player_url, encrypted_sig)):

3468

continue

3469

try:

3470

fmt_url += '&%s=%s' % (

3471

traverse_obj(sc, ('sp', -1)) or 'signature',

3472

self._decrypt_signature(encrypted_sig, video_id, player_url)

3473

)

3474

except ExtractorError as e:

3475

self.report_warning('Signature extraction failed: Some formats may be missing',

3476

video_id=video_id, only_once=True)

3477

self.write_debug(e, only_once=True)

3478

continue

3479

3480

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3485

fmt_url = update_url_query(fmt_url, {

3486

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3487

})

3488

except ExtractorError as e:

3489

phantomjs_hint = ''

3490

if isinstance(e, JSInterpreter.Exception):

3491

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3492

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3493

if player_url:

3494

self.report_warning(

3495

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3496

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3497

self.write_debug(e, only_once=True)

3498

else:

3499

self.report_warning(

3500

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3501

video_id=video_id, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3506

stream_ids.append(stream_id)

3507

3508

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3509

language_preference = (

3510

10 if audio_track.get('audioIsDefault') and 10

3511

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3512

else -1)

3513

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3514

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3515

# Make sure to avoid false positives with small duration differences.

3516

# E.g. __2ABJjxzNo, ySuUZEjARPY

3517

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3518

if is_damaged:

3519

self.report_warning(

3520

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3521

dct = {

3522

'asr': int_or_none(fmt.get('audioSampleRate')),

3523

'filesize': int_or_none(fmt.get('contentLength')),

3524

'format_id': itag,

3525

'format_note': join_nonempty(

3526

'%s%s' % (audio_track.get('displayName') or '',

3527

' (default)' if language_preference > 0 else ''),

3528

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3529

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3530

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3531

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3532

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3533

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3534

'fps': int_or_none(fmt.get('fps')) or None,

3535

'audio_channels': fmt.get('audioChannels'),

3536

'height': height,

3537

'quality': q(quality),

3538

'has_drm': bool(fmt.get('drmFamilies')),

3539

'tbr': tbr,

3540

'url': fmt_url,

3541

'width': int_or_none(fmt.get('width')),

3542

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3543

'desc' if language_preference < -1 else ''),

3544

'language_preference': language_preference,

3545

# Strictly de-prioritize damaged and 3gp formats

3546

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3547

}

3548

mime_mobj = re.match(

3549

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3550

if mime_mobj:

3551

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3552

dct.update(parse_codecs(mime_mobj.group(2)))

3553

no_audio = dct.get('acodec') == 'none'

3554

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3560

dct['downloader_options'] = {

3561

# Youtube throttles chunks >~10M

3562

'http_chunk_size': 10485760,

3563

}

3564

if dct.get('ext'):

3565

dct['container'] = dct['ext'] + '_dash'

3566

yield dct

3567

3568

needs_live_processing = self._needs_live_processing(live_status, duration)

3569

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3570

3571

skip_manifests = set(self._configuration_arg('skip'))

3572

if (not self.get_param('youtube_include_hls_manifest', True)

3573

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3574

or needs_live_processing and skip_bad_formats):

3575

skip_manifests.add('hls')

3576

3577

if not self.get_param('youtube_include_dash_manifest', True):

3578

skip_manifests.add('dash')

3579

if self._configuration_arg('include_live_dash'):

3580

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3581

'Use include_incomplete_formats extractor argument instead')

3582

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3583

skip_manifests.add('dash')

3584

3585

def process_manifest_format(f, proto, itag):

3586

if itag in itags:

3587

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3588

return False

3589

itag = f'{itag}-{proto}'

3590

if itag:

3591

f['format_id'] = itag

3592

itags[itag] = proto

3593

3594

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3595

if f['quality'] == -1 and f.get('height'):

3596

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3601

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3602

if hls_manifest_url:

3603

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3604

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3605

subtitles = self._merge_subtitles(subs, subtitles)

3606

for f in fmts:

3607

if process_manifest_format(f, 'hls', self._search_regex(

3608

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3609

yield f

3610

3611

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3612

if dash_manifest_url:

3613

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3614

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3615

for f in formats:

3616

if process_manifest_format(f, 'dash', f['format_id']):

3617

f['filesize'] = int_or_none(self._search_regex(

3618

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3619

if needs_live_processing:

3620

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3626

spec = get_first(

3627

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3628

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3633

args = args.split('#')

3634

counts = list(map(int_or_none, args[:5]))

3635

if len(args) != 8 or not all(counts):

3636

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3637

continue

3638

width, height, frame_count, cols, rows = counts

3639

N, sigh = args[6:]

3640

3641

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3642

fragment_count = frame_count / (cols * rows)

3643

fragment_duration = duration / fragment_count

3644

yield {

3645

'format_id': f'sb{i}',

3646

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3659

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3660

} for j in range(math.ceil(fragment_count))],

3661

}

3662

3663

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3664

webpage = None

3665

if 'webpage' not in self._configuration_arg('player_skip'):

3666

query = {'bpctr': '9999999999', 'has_verified': '1'}

3667

if smuggled_data.get('is_story'):

3668

query['pp'] = self._STORY_PLAYER_PARAMS

3669

webpage = self._download_webpage(

3670

webpage_url, video_id, fatal=False, query=query)

3671

3672

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3673

3674

player_responses, player_url = self._extract_player_responses(

3675

self._get_requested_clients(url, smuggled_data),

3676

video_id, webpage, master_ytcfg, smuggled_data)

3677

3678

return webpage, master_ytcfg, player_responses, player_url

3679

3680

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3681

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3682

is_live = get_first(video_details, 'isLive')

3683

if is_live is None:

3684

is_live = get_first(live_broadcast_details, 'isLiveNow')

3685

live_content = get_first(video_details, 'isLiveContent')

3686

is_upcoming = get_first(video_details, 'isUpcoming')

3687

if is_live is None and is_upcoming or live_content is False:

3688

is_live = False

3689

if is_upcoming is None and (live_content or is_live):

3690

is_upcoming = False

3691

post_live = get_first(video_details, 'isPostLiveDvr')

3692

live_status = ('post_live' if post_live

3693

else 'is_live' if is_live

3694

else 'is_upcoming' if is_upcoming

3695

else None if None in (is_live, is_upcoming, live_content)

3696

else 'was_live' if live_content else 'not_live')

3697

3698

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3699

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3700

3701

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3702

3703

def _real_extract(self, url):

3704

url, smuggled_data = unsmuggle_url(url, {})

3705

video_id = self._match_id(url)

3706

3707

base_url = self.http_scheme() + '//www.youtube.com/'

3708

webpage_url = base_url + 'watch?v=' + video_id

3709

3710

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3711

3712

playability_statuses = traverse_obj(

3713

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3714

3715

trailer_video_id = get_first(

3716

playability_statuses,

3717

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3718

expected_type=str)

3719

if trailer_video_id:

3720

return self.url_result(

3721

trailer_video_id, self.ie_key(), trailer_video_id)

3722

3723

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3724

if webpage else (lambda x: None))

3725

3726

video_details = traverse_obj(

3727

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3728

microformats = traverse_obj(

3729

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3730

expected_type=dict, default=[])

3731

3732

translated_title = self._get_text(microformats, (..., 'title'))

3733

video_title = (self._preferred_lang and translated_title

3734

or get_first(video_details, 'title') # primary

3735

or translated_title

3736

or search_meta(['og:title', 'twitter:title', 'title']))

3737

translated_description = self._get_text(microformats, (..., 'description'))

3738

original_description = get_first(video_details, 'shortDescription')

3739

video_description = (

3740

self._preferred_lang and translated_description

3741

# If original description is blank, it will be an empty string.

3742

# Do not prefer translated description in this case.

3743

or original_description if original_description is not None else translated_description)

3744

3745

multifeed_metadata_list = get_first(

3746

player_responses,

3747

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3748

expected_type=str)

3749

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3750

if self.get_param('noplaylist'):

3751

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3756

# Unquote should take place before split on comma (,) since textual

3757

# fields may contain comma as well (see

3758

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3759

feed_data = urllib.parse.parse_qs(

3760

urllib.parse.unquote_plus(feed))

3761

3762

def feed_entry(name):

3763

return try_get(

3764

feed_data, lambda x: x[name][0], str)

3765

3766

feed_id = feed_entry('id')

3767

if not feed_id:

3768

continue

3769

feed_title = feed_entry('title')

3770

title = video_title

3771

if feed_title:

3772

title += ' (%s)' % feed_title

3773

entries.append({

3774

'_type': 'url_transparent',

3775

'ie_key': 'Youtube',

3776

'url': smuggle_url(

3777

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3778

{'force_singlefeed': True}),

3779

'title': title,

3780

})

3781

feed_ids.append(feed_id)

3782

self.to_screen(

3783

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3784

% (', '.join(feed_ids), video_id))

3785

return self.playlist_result(

3786

entries, video_id, video_title, video_description)

3787

3788

duration = int_or_none(

3789

get_first(video_details, 'lengthSeconds')

3790

or get_first(microformats, 'lengthSeconds')

3791

or parse_duration(search_meta('duration'))) or None

3792

3793

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3794

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3795

if live_status == 'post_live':

3796

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3797

3798

if not formats:

3799

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3800

self.report_drm(video_id)

3801

pemr = get_first(

3802

playability_statuses,

3803

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3804

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3805

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3806

if subreason:

3807

if subreason == 'The uploader has not made this video available in your country.':

3808

countries = get_first(microformats, 'availableCountries')

3809

if not countries:

3810

regions_allowed = search_meta('regionsAllowed')

3811

countries = regions_allowed.split(',') if regions_allowed else None

3812

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3813

reason += f'. {subreason}'

3814

if reason:

3815

self.raise_no_formats(reason, expected=True)

3816

3817

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3818

if not keywords and webpage:

3819

keywords = [

3820

unescapeHTML(m.group('content'))

3821

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3822

for keyword in keywords:

3823

if keyword.startswith('yt:stretch='):

3824

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3825

if mobj:

3826

# NB: float is intentional for forcing float division

3827

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3832

f['stretched_ratio'] = ratio

3833

break

3834

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3835

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3836

if thumbnail_url:

3837

thumbnails.append({

3838

'url': thumbnail_url,

3839

})

3840

original_thumbnails = thumbnails.copy()

3841

3842

# The best resolution thumbnails sometimes does not appear in the webpage

3843

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3844

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3845

thumbnail_names = [

3846

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3847

# in resolution, these are not the custom thumbnail. So de-prioritize them

3848

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3849

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3850

]

3851

n_thumbnail_names = len(thumbnail_names)

3852

thumbnails.extend({

3853

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3854

video_id=video_id, name=name, ext=ext,

3855

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

3856

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3857

for thumb in thumbnails:

3858

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3859

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3860

self._remove_duplicate_formats(thumbnails)

3861

self._downloader._sort_thumbnails(original_thumbnails)

3862

3863

category = get_first(microformats, 'category') or search_meta('genre')

3864

channel_id = str_or_none(

3865

get_first(video_details, 'channelId')

3866

or get_first(microformats, 'externalChannelId')

3867

or search_meta('channelId'))

3868

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3869

3870

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3871

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3872

if not duration and live_end_time and live_start_time:

3873

duration = live_end_time - live_start_time

3874

3875

needs_live_processing = self._needs_live_processing(live_status, duration)

3876

3877

def is_bad_format(fmt):

3878

if needs_live_processing and not fmt.get('is_from_start'):

3879

return True

3880

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

3881

and fmt.get('protocol') == 'http_dash_segments'):

3882

return True

3883

3884

for fmt in filter(is_bad_format, formats):

3885

fmt['preference'] = (fmt.get('preference') or -1) - 10

3886

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

3887

3888

if needs_live_processing:

3889

self._prepare_live_from_start_formats(

3890

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

3891

3892

formats.extend(self._extract_storyboard(player_responses, duration))

3893

3894

# source_preference is lower for throttled/potentially damaged formats

3895

self._sort_formats(formats, (

3896

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3901

'formats': formats,

3902

'thumbnails': thumbnails,

3903

# The best thumbnail that we are sure exists. Prevents unnecessary

3904

# URL checking if user don't care about getting the best possible thumbnail

3905

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3906

'description': video_description,

3907

'uploader': get_first(video_details, 'author'),

3908

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3909

'uploader_url': owner_profile_url,

3910

'channel_id': channel_id,

3911

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3912

'duration': duration,

3913

'view_count': int_or_none(

3914

get_first((video_details, microformats), (..., 'viewCount'))

3915

or search_meta('interactionCount')),

3916

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3917

'age_limit': 18 if (

3918

get_first(microformats, 'isFamilySafe') is False

3919

or search_meta('isFamilyFriendly') == 'false'

3920

or search_meta('og:restrictions:age') == '18+') else 0,

3921

'webpage_url': webpage_url,

3922

'categories': [category] if category else None,

3923

'tags': keywords,

3924

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3925

'live_status': live_status,

3926

'release_timestamp': live_start_time,

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3931

if pctr:

3932

def get_lang_code(track):

3933

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3934

or track.get('languageCode'))

3935

3936

# Converted into dicts to remove duplicates

3937

captions = {

3938

get_lang_code(sub): sub

3939

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3940

translation_languages = {

3941

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3942

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3943

3944

def process_language(container, base_url, lang_code, sub_name, query):

3945

lang_subs = container.setdefault(lang_code, [])

3946

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3957

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3958

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3959

for lang_code, caption_track in captions.items():

3960

base_url = caption_track.get('baseUrl')

3961

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3962

if not base_url:

3963

continue

3964

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3965

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3970

if not caption_track.get('isTranslatable'):

3971

continue

3972

for trans_code, trans_name in translation_languages.items():

3973

if not trans_code:

3974

continue

3975

orig_trans_code = trans_code

3976

if caption_track.get('kind') != 'asr':

3977

if not get_translated_subs:

3978

continue

3979

trans_code += f'-{lang_code}'

3980

trans_name += format_field(lang_name, None, ' from %s')

3981

# Add an "-orig" label to the original language so that it can be distinguished.

3982

# The subs are returned without "-orig" as well for compatibility

3983

if lang_code == f'a-{orig_trans_code}':

3984

process_language(

3985

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3986

# Setting tlang=lang returns damaged subtitles.

3987

process_language(automatic_captions, base_url, trans_code, trans_name,

3988

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3989

3990

info['automatic_captions'] = automatic_captions

3991

info['subtitles'] = subtitles

3992

3993

parsed_url = urllib.parse.urlparse(url)

3994

for component in [parsed_url.fragment, parsed_url.query]:

3995

query = urllib.parse.parse_qs(component)

3996

for k, v in query.items():

3997

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3998

d_k += '_time'

3999

if d_k not in info and k in s_ks:

4000

info[d_k] = parse_duration(query[k][0])

4001

4002

# Youtube Music Auto-generated description

4003

if video_description:

4004

mobj = re.search(

4005

r'''(?xs)

4006

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4007

(?P<album>[^\n]+)

4008

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4009

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4010

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4011

.+\nAuto-generated\ by\ YouTube\.\s*$

4012

''', video_description)

4013

if mobj:

4014

release_year = mobj.group('release_year')

4015

release_date = mobj.group('release_date')

4016

if release_date:

4017

release_date = release_date.replace('-', '')

4018

if not release_year:

4019

release_year = release_date[:4]

4020

info.update({

4021

'album': mobj.group('album'.strip()),

4022

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4023

'track': mobj.group('track').strip(),

4024

'release_date': release_date,

4025

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4031

if not initial_data:

4032

query = {'videoId': video_id}

4033

query.update(self._get_checkok_params())

4034

initial_data = self._extract_response(

4035

item_id=video_id, ep='next', fatal=False,

4036

ytcfg=master_ytcfg, query=query,

4037

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4038

note='Downloading initial data API JSON')

4039

4040

info['comment_count'] = traverse_obj(initial_data, (

4041

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4042

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4043

), (

4044

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4045

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4046

), expected_type=int_or_none, get_all=False)

4047

4048

try: # This will error if there is no livechat

4049

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4050

except (KeyError, IndexError, TypeError):

4051

pass

4052

else:

4053

info.setdefault('subtitles', {})['live_chat'] = [{

4054

# url is needed to set cookies

4055

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4056

'video_id': video_id,

4057

'ext': 'json',

4058

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4059

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4065

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4066

or self._extract_chapters_from_description(video_description, duration)

4067

or None)

4068

4069

contents = traverse_obj(

4070

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4071

expected_type=list, default=[])

4072

4073

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4074

if vpir:

4075

stl = vpir.get('superTitleLink')

4076

if stl:

4077

stl = self._get_text(stl)

4078

if try_get(

4079

vpir,

4080

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4081

info['location'] = stl

4082

else:

4083

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4084

if mobj:

4085

info.update({

4086

'series': mobj.group(1),

4087

'season_number': int(mobj.group(2)),

4088

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4097

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4098

default=[]))

4099

for tbr in tbrs:

4100

for getter, regex in [(

4101

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4102

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4103

lambda x: x['accessibility'],

4104

lambda x: x['accessibilityData']['accessibilityData'],

4105

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4106

label = (try_get(tbr, getter, dict) or {}).get('label')

4107

if label:

4108

mobj = re.match(regex, label)

4109

if mobj:

4110

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4111

break

4112

sbr_tooltip = try_get(

4113

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4114

if sbr_tooltip:

4115

like_count, dislike_count = sbr_tooltip.split(' / ')

4116

info.update({

4117

'like_count': str_to_int(like_count),

4118

'dislike_count': str_to_int(dislike_count),

4119

})

4120

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4121

if vcr:

4122

vc = self._get_count(vcr, 'viewCount')

4123

# Upcoming premieres with waiting count are treated as live here

4124

if vcr.get('isLive'):

4125

info['concurrent_view_count'] = vc

4126

elif info.get('view_count') is None:

4127

info['view_count'] = vc

4128

4129

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4130

if vsir:

4131

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4132

info.update({

4133

'channel': self._get_text(vor, 'title'),

4134

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4139

list) or []

4140

multiple_songs = False

4141

for row in rows:

4142

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4143

multiple_songs = True

4144

break

4145

for row in rows:

4146

mrr = row.get('metadataRowRenderer') or {}

4147

mrr_title = mrr.get('title')

4148

if not mrr_title:

4149

continue

4150

mrr_title = self._get_text(mrr, 'title')

4151

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4152

if mrr_title == 'License':

4153

info['license'] = mrr_contents_text

4154

elif not multiple_songs:

4155

if mrr_title == 'Album':

4156

info['album'] = mrr_contents_text

4157

elif mrr_title == 'Artist':

4158

info['artist'] = mrr_contents_text

4159

elif mrr_title == 'Song':

4160

info['track'] = mrr_contents_text

4161

4162

fallbacks = {

4163

'channel': 'uploader',

4164

'channel_id': 'uploader_id',

4165

'channel_url': 'uploader_url',

4166

}

4167

4168

# The upload date for scheduled, live and past live streams / premieres in microformats

4169

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4170

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4171

upload_date = (

4172

unified_strdate(get_first(microformats, 'uploadDate'))

4173

or unified_strdate(search_meta('uploadDate')))

4174

if not upload_date or (

4175

live_status in ('not_live', None)

4176

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4177

):

4178

upload_date = strftime_or_none(

4179

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4180

info['upload_date'] = upload_date

4181

4182

for to, frm in fallbacks.items():

4183

if not info.get(to):

4184

info[to] = info.get(frm)

4185

4186

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4192

4193

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4194

or get_first(video_details, 'isPrivate', expected_type=bool))

4195

4196

info['availability'] = (

4197

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4198

else self._availability(

4199

is_private=is_private,

4200

needs_premium=(

4201

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4202

or False if initial_data and is_private is not None else None),

4203

needs_subscription=(

4204

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4205

or False if initial_data and is_private is not None else None),

4206

needs_auth=info['age_limit'] >= 18,

4207

is_unlisted=None if is_private is None else (

4208

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4209

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4210

4211

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4212

4213

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4219

4220

@staticmethod

4221

def passthrough_smuggled_data(func):

4222

def _smuggle(entries, smuggled_data):

4223

for entry in entries:

4224

# TODO: Convert URL to music.youtube instead.

4225

# Do we need to passthrough any other smuggled_data?

4226

entry['url'] = smuggle_url(entry['url'], smuggled_data)

4227

yield entry

4228

4229

@functools.wraps(func)

4230

def wrapper(self, url):

4231

url, smuggled_data = unsmuggle_url(url, {})

4232

if self.is_music_url(url):

4233

smuggled_data['is_music_url'] = True

4234

info_dict = func(self, url, smuggled_data)

4235

if smuggled_data and info_dict.get('entries'):

4236

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4241

channel_id = self._html_search_meta(

4242

'channelId', webpage, 'channel id', default=None)

4243

if channel_id:

4244

return channel_id

4245

channel_url = self._html_search_meta(

4246

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4247

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4248

'twitter:app:url:googleplay'), webpage, 'channel url')

4249

return self._search_regex(

4250

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4251

channel_url, 'channel id')

4252

4253

@staticmethod

4254

def _extract_basic_item_renderer(item):

4255

# Modified from _extract_grid_item_renderer

4256

known_basic_renderers = (

4257

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4258

)

4259

for key, renderer in item.items():

4260

if not isinstance(renderer, dict):

4261

continue

4262

elif key in known_basic_renderers:

4263

return renderer

4264

elif key.startswith('grid') and key.endswith('Renderer'):

4265

return renderer

4266

4267

def _grid_entries(self, grid_renderer):

4268

for item in grid_renderer['items']:

4269

if not isinstance(item, dict):

4270

continue

4271

renderer = self._extract_basic_item_renderer(item)

4272

if not isinstance(renderer, dict):

4273

continue

4274

title = self._get_text(renderer, 'title')

4275

4276

# playlist

4277

playlist_id = renderer.get('playlistId')

4278

if playlist_id:

4279

yield self.url_result(

4280

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4281

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4286

if video_id:

4287

yield self._extract_video(renderer)

4288

continue

4289

# channel

4290

channel_id = renderer.get('channelId')

4291

if channel_id:

4292

yield self.url_result(

4293

'https://www.youtube.com/channel/%s' % channel_id,

4294

ie=YoutubeTabIE.ie_key(), video_title=title)

4295

continue

4296

# generic endpoint URL support

4297

ep_url = urljoin('https://www.youtube.com/', try_get(

4298

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4299

str))

4300

if ep_url:

4301

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4302

if ie.suitable(ep_url):

4303

yield self.url_result(

4304

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4305

break

4306

4307

def _music_reponsive_list_entry(self, renderer):

4308

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4309

if video_id:

4310

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4311

ie=YoutubeIE.ie_key(), video_id=video_id)

4312

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4313

if playlist_id:

4314

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4315

if video_id:

4316

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4317

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4318

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4319

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4320

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4321

if browse_id:

4322

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4323

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4324

4325

def _shelf_entries_from_content(self, shelf_renderer):

4326

content = shelf_renderer.get('content')

4327

if not isinstance(content, dict):

4328

return

4329

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4330

if renderer:

4331

# TODO: add support for nested playlists so each shelf is processed

4332

# as separate playlist

4333

# TODO: this includes only first N items

4334

yield from self._grid_entries(renderer)

4335

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4341

ep = try_get(

4342

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4343

str)

4344

shelf_url = urljoin('https://www.youtube.com', ep)

4345

if shelf_url:

4346

# Skipping links to another channels, note that checking for

4347

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4348

# will not work

4349

if skip_channels and '/channels?' in shelf_url:

4350

return

4351

title = self._get_text(shelf_renderer, 'title')

4352

yield self.url_result(shelf_url, video_title=title)

4353

# Shelf may not contain shelf URL, fallback to extraction from content

4354

yield from self._shelf_entries_from_content(shelf_renderer)

4355

4356

def _playlist_entries(self, video_list_renderer):

4357

for content in video_list_renderer['contents']:

4358

if not isinstance(content, dict):

4359

continue

4360

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4361

if not isinstance(renderer, dict):

4362

continue

4363

video_id = renderer.get('videoId')

4364

if not video_id:

4365

continue

4366

yield self._extract_video(renderer)

4367

4368

def _rich_entries(self, rich_grid_renderer):

4369

renderer = traverse_obj(

4370

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4371

video_id = renderer.get('videoId')

4372

if not video_id:

4373

return

4374

yield self._extract_video(renderer)

4375

4376

def _video_entry(self, video_renderer):

4377

video_id = video_renderer.get('videoId')

4378

if video_id:

4379

return self._extract_video(video_renderer)

4380

4381

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4382

url = urljoin('https://youtube.com', traverse_obj(

4383

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4384

if url:

4385

return self.url_result(

4386

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4387

4388

def _post_thread_entries(self, post_thread_renderer):

4389

post_renderer = try_get(

4390

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4391

if not post_renderer:

4392

return

4393

# video attachment

4394

video_renderer = try_get(

4395

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4396

video_id = video_renderer.get('videoId')

4397

if video_id:

4398

entry = self._extract_video(video_renderer)

4399

if entry:

4400

yield entry

4401

# playlist attachment

4402

playlist_id = try_get(

4403

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4404

if playlist_id:

4405

yield self.url_result(

4406

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4407

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4408

# inline video links

4409

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4410

for run in runs:

4411

if not isinstance(run, dict):

4412

continue

4413

ep_url = try_get(

4414

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4415

if not ep_url:

4416

continue

4417

if not YoutubeIE.suitable(ep_url):

4418

continue

4419

ep_video_id = YoutubeIE._match_id(ep_url)

4420

if video_id == ep_video_id:

4421

continue

4422

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4423

4424

def _post_thread_continuation_entries(self, post_thread_continuation):

4425

contents = post_thread_continuation.get('contents')

4426

if not isinstance(contents, list):

4427

return

4428

for content in contents:

4429

renderer = content.get('backstagePostThreadRenderer')

4430

if isinstance(renderer, dict):

4431

yield from self._post_thread_entries(renderer)

4432

continue

4433

renderer = content.get('videoRenderer')

4434

if isinstance(renderer, dict):

4435

yield self._video_entry(renderer)

4436

4437

r''' # unused

4438

def _rich_grid_entries(self, contents):

4439

for content in contents:

4440

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4441

if video_renderer:

4442

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4448

for url in traverse_obj(renderer, (

4449

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4450

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4451

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4452

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4453

4454

def _extract_entries(self, parent_renderer, continuation_list):

4455

# continuation_list is modified in-place with continuation_list = [continuation_token]

4456

continuation_list[:] = [None]

4457

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4458

for content in contents:

4459

if not isinstance(content, dict):

4460

continue

4461

is_renderer = traverse_obj(

4462

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4463

expected_type=dict)

4464

if not is_renderer:

4465

if content.get('richItemRenderer'):

4466

for entry in self._rich_entries(content['richItemRenderer']):

4467

yield entry

4468

continuation_list[0] = self._extract_continuation(parent_renderer)

4469

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4470

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4471

yield from self._report_history_entries(table)

4472

continuation_list[0] = self._extract_continuation(table)

4473

continue

4474

4475

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4476

for isr_content in isr_contents:

4477

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4482

'gridRenderer': self._grid_entries,

4483

'reelShelfRenderer': self._grid_entries,

4484

'shelfRenderer': self._shelf_entries,

4485

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4486

'backstagePostThreadRenderer': self._post_thread_entries,

4487

'videoRenderer': lambda x: [self._video_entry(x)],

4488

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4489

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4490

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4491

}

4492

for key, renderer in isr_content.items():

4493

if key not in known_renderers:

4494

continue

4495

for entry in known_renderers[key](renderer):

4496

if entry:

4497

yield entry

4498

continuation_list[0] = self._extract_continuation(renderer)

4499

break

4500

4501

if not continuation_list[0]:

4502

continuation_list[0] = self._extract_continuation(is_renderer)

4503

4504

if not continuation_list[0]:

4505

continuation_list[0] = self._extract_continuation(parent_renderer)

4506

4507

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4508

continuation_list = [None]

4509

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4510

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4515

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4516

yield from extract_entries(parent_renderer)

4517

continuation = continuation_list[0]

4518

4519

for page_num in itertools.count(1):

4520

if not continuation:

4521

break

4522

headers = self.generate_api_headers(

4523

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4524

response = self._extract_response(

4525

item_id=f'{item_id} page {page_num}',

4526

query=continuation, headers=headers, ytcfg=ytcfg,

4527

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4532

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4533

visitor_data = self._extract_visitor_data(response) or visitor_data

4534

4535

known_renderers = {

4536

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4537

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4538

'gridVideoRenderer': (self._grid_entries, 'items'),

4539

'gridChannelRenderer': (self._grid_entries, 'items'),

4540

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4541

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4542

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4543

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4544

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4545

'playlistVideoListContinuation': (self._playlist_entries, None),

4546

'gridContinuation': (self._grid_entries, None),

4547

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4548

'sectionListContinuation': (extract_entries, None), # for feeds

4549

}

4550

4551

continuation_items = traverse_obj(response, (

4552

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4553

'appendContinuationItemsAction', 'continuationItems'

4554

), 'continuationContents', get_all=False)

4555

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4556

4557

video_items_renderer = None

4558

for key in continuation_item.keys():

4559

if key not in known_renderers:

4560

continue

4561

func, parent_key = known_renderers[key]

4562

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4563

continuation_list = [None]

4564

yield from func(video_items_renderer)

4565

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4566

4567

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4572

for tab in tabs:

4573

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4574

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4579

4580

def _extract_uploader(self, data):

4581

uploader = {}

4582

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4583

owner = try_get(

4584

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4585

if owner:

4586

owner_text = owner.get('text')

4587

uploader['uploader'] = self._search_regex(

4588

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4589

uploader['uploader_id'] = try_get(

4590

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4591

uploader['uploader_url'] = urljoin(

4592

'https://www.youtube.com/',

4593

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4594

return filter_dict(uploader)

4595

4596

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4597

playlist_id = title = description = channel_url = channel_name = channel_id = None

4598

tags = []

4599

4600

selected_tab = self._extract_selected_tab(tabs)

4601

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4602

renderer = try_get(

4603

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4604

if renderer:

4605

channel_name = renderer.get('title')

4606

channel_url = renderer.get('channelUrl')

4607

channel_id = renderer.get('externalId')

4608

else:

4609

renderer = try_get(

4610

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4611

4612

if renderer:

4613

title = renderer.get('title')

4614

description = renderer.get('description', '')

4615

playlist_id = channel_id

4616

tags = renderer.get('keywords', '').split()

4617

4618

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4619

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4620

def _get_uncropped(url):

4621

return url_or_none((url or '').split('=')[0] + '=s0')

4622

4623

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4624

if avatar_thumbnails:

4625

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4626

if uncropped_avatar:

4627

avatar_thumbnails.append({

4628

'url': uncropped_avatar,

4629

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4634

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4635

for banner in channel_banners:

4636

banner['preference'] = -10

4637

4638

if channel_banners:

4639

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4640

if uncropped_banner:

4641

channel_banners.append({

4642

'url': uncropped_banner,

4643

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4648

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4649

4650

if playlist_id is None:

4651

playlist_id = item_id

4652

4653

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4654

last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))

4655

if title is None:

4656

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4657

title += format_field(selected_tab, 'title', ' - %s')

4658

title += format_field(selected_tab, 'expandedText', ' - %s')

4659

4660

metadata = {

4661

'playlist_id': playlist_id,

4662

'playlist_title': title,

4663

'playlist_description': description,

4664

'uploader': channel_name,

4665

'uploader_id': channel_id,

4666

'uploader_url': channel_url,

4667

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4668

'tags': tags,

4669

'view_count': self._get_count(playlist_stats, 1),

4670

'availability': self._extract_availability(data),

4671

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4672

'playlist_count': self._get_count(playlist_stats, 0),

4673

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4674

}

4675

if not channel_id:

4676

metadata.update(self._extract_uploader(data))

4677

metadata.update({

4678

'channel': metadata['uploader'],

4679

'channel_id': metadata['uploader_id'],

4680

'channel_url': metadata['uploader_url']})

4681

return self.playlist_result(

4682

self._entries(

4683

selected_tab, playlist_id, ytcfg,

4684

self._extract_account_syncid(ytcfg, data),

4685

self._extract_visitor_data(data, ytcfg)),

4686

**metadata)

4687

4688

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4689

first_id = last_id = response = None

4690

for page_num in itertools.count(1):

4691

videos = list(self._playlist_entries(playlist))

4692

if not videos:

4693

return

4694

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4695

if start >= len(videos):

4696

return

4697

yield from videos[start:]

4698

first_id = first_id or videos[0]['id']

4699

last_id = videos[-1]['id']

4700

watch_endpoint = try_get(

4701

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4702

headers = self.generate_api_headers(

4703

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4704

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4705

query = {

4706

'playlistId': playlist_id,

4707

'videoId': watch_endpoint.get('videoId') or last_id,

4708

'index': watch_endpoint.get('index') or len(videos),

4709

'params': watch_endpoint.get('params') or 'OAE%3D'

4710

}

4711

response = self._extract_response(

4712

item_id='%s page %d' % (playlist_id, page_num),

4713

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4714

check_get_keys='contents'

4715

)

4716

playlist = try_get(

4717

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4718

4719

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4720

title = playlist.get('title') or try_get(

4721

data, lambda x: x['titleText']['simpleText'], str)

4722

playlist_id = playlist.get('playlistId') or item_id

4723

4724

# Delegating everything except mix playlists to regular tab-based playlist URL

4725

playlist_url = urljoin(url, try_get(

4726

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4727

str))

4728

4729

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4730

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4731

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4732

4733

if playlist_url and playlist_url != url and not is_known_unviewable:

4734

return self.url_result(

4735

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4736

video_title=title)

4737

4738

return self.playlist_result(

4739

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4740

playlist_id=playlist_id, playlist_title=title)

4741

4742

def _extract_availability(self, data):

4743

"""

4744

Gets the availability of a given playlist/tab.

4745

Note: Unless YouTube tells us explicitly, we do not assume it is public

4746

@param data: response

4747

"""

4748

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4749

4750

player_header_privacy = traverse_obj(

4751

data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)

4752

4753

badges = self._extract_badges(renderer)

4754

4755

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4756

privacy_setting_icon = traverse_obj(

4757

renderer, (

4758

'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4759

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4760

get_all=False, expected_type=str)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4765

or player_header_privacy == 'PUBLIC'

4766

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4767

else self._availability(

4768

is_private=(

4769

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4770

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4771

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4772

is_unlisted=(

4773

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4774

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4775

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),

4776

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4777

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4782

sidebar_renderer = try_get(

4783

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4784

for item in sidebar_renderer:

4785

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4790

"""

4791

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4792

"""

4793

browse_id = params = None

4794

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4795

if not renderer:

4796

return

4797

menu_renderer = try_get(

4798

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4799

for menu_item in menu_renderer:

4800

if not isinstance(menu_item, dict):

4801

continue

4802

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4803

text = try_get(

4804

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4805

if not text or text.lower() != 'show unavailable videos':

4806

continue

4807

browse_endpoint = try_get(

4808

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4809

browse_id = browse_endpoint.get('browseId')

4810

params = browse_endpoint.get('params')

4811

break

4812

4813

headers = self.generate_api_headers(

4814

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4815

visitor_data=self._extract_visitor_data(data, ytcfg))

4816

query = {

4817

'params': params or 'wgYCCAA=',

4818

'browseId': browse_id or 'VL%s' % item_id

4819

}

4820

return self._extract_response(

4821

item_id=item_id, headers=headers, query=query,

4822

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4823

note='Downloading API JSON with unavailable videos')

4824

4825

@functools.cached_property

4826

def skip_webpage(self):

4827

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4828

4829

def _extract_webpage(self, url, item_id, fatal=True):

4830

webpage, data = None, None

4831

for retry in self.RetryManager(fatal=fatal):

4832

try:

4833

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4834

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4835

except ExtractorError as e:

4836

if isinstance(e.cause, network_exceptions):

4837

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4838

retry.error = e

4839

continue

4840

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4845

except ExtractorError as e:

4846

self._error_or_warning(e, fatal=fatal)

4847

break

4848

4849

# Sometimes youtube returns a webpage with incomplete ytInitialData

4850

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4851

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4852

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4858

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4859

if not ytcfg and self.is_authenticated:

4860

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4861

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4862

raise ExtractorError(

4863

f'{msg}. If you are not downloading private content, or '

4864

'your cookies are only for the first account and channel,'

4865

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4866

expected=True)

4867

self.report_warning(msg, only_once=True)

4868

4869

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4870

data = None

4871

if not self.skip_webpage:

4872

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4873

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4874

# Reject webpage data if redirected to home page without explicitly requesting

4875

selected_tab = self._extract_selected_tab(traverse_obj(

4876

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4877

if (url != 'https://www.youtube.com/feed/recommended'

4878

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4879

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4880

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4881

if fatal:

4882

raise ExtractorError(msg, expected=True)

4883

self.report_warning(msg, only_once=True)

4884

if not data:

4885

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4886

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4887

return data, ytcfg

4888

4889

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4890

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4891

resolve_response = self._extract_response(

4892

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4893

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4894

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4895

for ep_key, ep in endpoints.items():

4896

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4897

if params:

4898

return self._extract_response(

4899

item_id=item_id, query=params, ep=ep, headers=headers,

4900

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4901

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4902

err_note = 'Failed to resolve url (does the playlist exist?)'

4903

if fatal:

4904

raise ExtractorError(err_note, expected=True)

4905

self.report_warning(err_note, item_id)

4906

4907

_SEARCH_PARAMS = None

4908

4909

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4910

data = {'query': query}

4911

if params is NO_DEFAULT:

4912

params = self._SEARCH_PARAMS

4913

if params:

4914

data['params'] = params

4915

4916

content_keys = (

4917

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4918

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4919

# ytmusic search

4920

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4921

('continuationContents', ),

4922

)

4923

display_id = f'query "{query}"'

4924

check_get_keys = tuple({keys[0] for keys in content_keys})

4925

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4926

self._report_playlist_authcheck(ytcfg, fatal=False)

4927

4928

continuation_list = [None]

4929

search = None

4930

for page_num in itertools.count(1):

4931

data.update(continuation_list[0] or {})

4932

headers = self.generate_api_headers(

4933

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4934

search = self._extract_response(

4935

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4936

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4937

slr_contents = traverse_obj(search, *content_keys)

4938

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4939

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4944

IE_DESC = 'YouTube Tabs'

4945

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4954

(?P<not_channel>

4955

feed/|hashtag/|

4956

(?:playlist|watch)\?.*?\blist=

4957

)|

4958

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4963

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4964

}

4965

IE_NAME = 'youtube:tab'

4966

4967

_TESTS = [{

4968

'note': 'playlists, multipage',

4969

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4970

'playlist_mincount': 94,

4971

'info_dict': {

4972

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4973

'title': 'Igor Kleiner - Playlists',

4974

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4975

'uploader': 'Igor Kleiner',

4976

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4977

'channel': 'Igor Kleiner',

4978

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4979

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4980

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4981

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4982

'channel_follower_count': int

4983

},

4984

}, {

4985

'note': 'playlists, multipage, different order',

4986

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4987

'playlist_mincount': 94,

4988

'info_dict': {

4989

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4990

'title': 'Igor Kleiner - Playlists',

4991

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4992

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4993

'uploader': 'Igor Kleiner',

4994

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4995

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4996

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4997

'channel': 'Igor Kleiner',

4998

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4999

'channel_follower_count': int

5000

},

5001

}, {

5002

'note': 'playlists, series',

5003

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5004

'playlist_mincount': 5,

5005

'info_dict': {

5006

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5007

'title': '3Blue1Brown - Playlists',

5008

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5009

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5010

'uploader': '3Blue1Brown',

5011

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5012

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5013

'channel': '3Blue1Brown',

5014

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5015

'tags': ['Mathematics'],

5016

'channel_follower_count': int

5017

},

5018

}, {

5019

'note': 'playlists, singlepage',

5020

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5021

'playlist_mincount': 4,

5022

'info_dict': {

5023

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5024

'title': 'ThirstForScience - Playlists',

5025

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5026

'uploader': 'ThirstForScience',

5027

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5028

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5029

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5030

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5031

'tags': 'count:13',

5032

'channel': 'ThirstForScience',

5033

'channel_follower_count': int

5034

}

5035

}, {

5036

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5037

'only_matching': True,

5038

}, {

5039

'note': 'basic, single video playlist',

5040

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5041

'info_dict': {

5042

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5043

'uploader': 'Sergey M.',

5044

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5045

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5050

'channel': 'Sergey M.',

5051

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5052

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5053

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5054

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5059

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5060

'info_dict': {

5061

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5062

'uploader': 'Sergey M.',

5063

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5064

'title': 'youtube-dl empty playlist',

5065

'tags': [],

5066

'channel': 'Sergey M.',

5067

'description': '',

5068

'modified_date': '20160902',

5069

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5070

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5071

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5072

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5078

'info_dict': {

5079

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5080

'title': 'lex will - Home',

5081

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5082

'uploader': 'lex will',

5083

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5084

'channel': 'lex will',

5085

'tags': ['bible', 'history', 'prophesy'],

5086

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5087

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5088

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5089

'channel_follower_count': int

5090

},

5091

'playlist_mincount': 2,

5092

}, {

5093

'note': 'Videos tab',

5094

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5095

'info_dict': {

5096

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5097

'title': 'lex will - Videos',

5098

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5099

'uploader': 'lex will',

5100

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5101

'tags': ['bible', 'history', 'prophesy'],

5102

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5103

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5104

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5105

'channel': 'lex will',

5106

'channel_follower_count': int

5107

},

5108

'playlist_mincount': 975,

5109

}, {

5110

'note': 'Videos tab, sorted by popular',

5111

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5112

'info_dict': {

5113

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5114

'title': 'lex will - Videos',

5115

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5116

'uploader': 'lex will',

5117

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5118

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5119

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5120

'channel': 'lex will',

5121

'tags': ['bible', 'history', 'prophesy'],

5122

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5123

'channel_follower_count': int

5124

},

5125

'playlist_mincount': 199,

5126

}, {

5127

'note': 'Playlists tab',

5128

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5129

'info_dict': {

5130

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5131

'title': 'lex will - Playlists',

5132

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5133

'uploader': 'lex will',

5134

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5135

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5136

'channel': 'lex will',

5137

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5138

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5139

'tags': ['bible', 'history', 'prophesy'],

5140

'channel_follower_count': int

5141

},

5142

'playlist_mincount': 17,

5143

}, {

5144

'note': 'Community tab',

5145

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5146

'info_dict': {

5147

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5148

'title': 'lex will - Community',

5149

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5150

'uploader': 'lex will',

5151

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5152

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5153

'channel': 'lex will',

5154

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5155

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5156

'tags': ['bible', 'history', 'prophesy'],

5157

'channel_follower_count': int

5158

},

5159

'playlist_mincount': 18,

5160

}, {

5161

'note': 'Channels tab',

5162

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5163

'info_dict': {

5164

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5165

'title': 'lex will - Channels',

5166

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5167

'uploader': 'lex will',

5168

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5169

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5170

'channel': 'lex will',

5171

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5172

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5173

'tags': ['bible', 'history', 'prophesy'],

5174

'channel_follower_count': int

5175

},

5176

'playlist_mincount': 12,

5177

}, {

5178

'note': 'Search tab',

5179

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5180

'playlist_mincount': 40,

5181

'info_dict': {

5182

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5183

'title': '3Blue1Brown - Search - linear algebra',

5184

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5185

'uploader': '3Blue1Brown',

5186

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5187

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5188

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5189

'tags': ['Mathematics'],

5190

'channel': '3Blue1Brown',

5191

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5192

'channel_follower_count': int

5193

},

5194

}, {

5195

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5196

'only_matching': True,

5197

}, {

5198

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5199

'only_matching': True,

5200

}, {

5201

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5202

'only_matching': True,

5203

}, {

5204

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5205

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5206

'info_dict': {

5207

'title': '29C3: Not my department',

5208

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5209

'uploader': 'Christiaan008',

5210

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5211

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5212

'tags': [],

5213

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5214

'view_count': int,

5215

'modified_date': '20150605',

5216

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5217

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5218

'channel': 'Christiaan008',

5219

'availability': 'public',

5220

},

5221

'playlist_count': 96,

5222

}, {

5223

'note': 'Large playlist',

5224

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5225

'info_dict': {

5226

'title': 'Uploads from Cauchemar',

5227

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5228

'uploader': 'Cauchemar',

5229

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5230

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5231

'tags': [],

5232

'modified_date': r're:\d{8}',

5233

'channel': 'Cauchemar',

5234

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5235

'view_count': int,

5236

'description': '',

5237

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5238

'availability': 'public',

5239

},

5240

'playlist_mincount': 1123,

5241

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5242

}, {

5243

'note': 'even larger playlist, 8832 videos',

5244

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5245

'only_matching': True,

5246

}, {

5247

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5248

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5249

'info_dict': {

5250

'title': 'Uploads from Interstellar Movie',

5251

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5252

'uploader': 'Interstellar Movie',

5253

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5254

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5255

'tags': [],

5256

'view_count': int,

5257

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5258

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5259

'channel': 'Interstellar Movie',

5260

'description': '',

5261

'modified_date': r're:\d{8}',

5262

'availability': 'public',

5263

},

5264

'playlist_mincount': 21,

5265

}, {

5266

'note': 'Playlist with "show unavailable videos" button',

5267

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5268

'info_dict': {

5269

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5270

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5271

'uploader': 'Phim Siêu Nhân Nhật Bản',

5272

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5273

'view_count': int,

5274

'channel': 'Phim Siêu Nhân Nhật Bản',

5275

'tags': [],

5276

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5277

'description': '',

5278

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5279

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5280

'modified_date': r're:\d{8}',

5281

'availability': 'public',

5282

},

5283

'playlist_mincount': 200,

5284

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5285

}, {

5286

'note': 'Playlist with unavailable videos in page 7',

5287

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5288

'info_dict': {

5289

'title': 'Uploads from BlankTV',

5290

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5291

'uploader': 'BlankTV',

5292

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5293

'channel': 'BlankTV',

5294

'channel_url': 'https://www.youtube.com/c/blanktv',

5295

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5296

'view_count': int,

5297

'tags': [],

5298

'uploader_url': 'https://www.youtube.com/c/blanktv',

5299

'modified_date': r're:\d{8}',

5300

'description': '',

5301

'availability': 'public',

5302

},

5303

'playlist_mincount': 1000,

5304

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5305

}, {

5306

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5307

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5308

'info_dict': {

5309

'title': 'Data Analysis with Dr Mike Pound',

5310

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5311

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5312

'uploader': 'Computerphile',

5313

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5314

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5315

'tags': [],

5316

'view_count': int,

5317

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5318

'channel_url': 'https://www.youtube.com/user/Computerphile',

5319

'channel': 'Computerphile',

5320

'availability': 'public',

5321

},

5322

'playlist_mincount': 11,

5323

}, {

5324

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5325

'only_matching': True,

5326

}, {

5327

'note': 'Playlist URL that does not actually serve a playlist',

5328

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5333

'uploader': 'STREEM',

5334

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5335

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5336

'upload_date': '20150526',

5337

'license': 'Standard YouTube License',

5338

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5339

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5346

},

5347

'skip': 'This video is not available.',

5348

'add_ie': [YoutubeIE.ie_key()],

5349

}, {

5350

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5351

'only_matching': True,

5352

}, {

5353

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5354

'only_matching': True,

5355

}, {

5356

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5357

'info_dict': {

5358

'id': 'Wq15eF5vCbI', # This will keep changing

5359

'ext': 'mp4',

5360

'title': str,

5361

'uploader': 'Sky News',

5362

'uploader_id': 'skynews',

5363

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5364

'upload_date': r're:\d{8}',

5365

'description': str,

5366

'categories': ['News & Politics'],

5367

'tags': list,

5368

'like_count': int,

5369

'release_timestamp': 1642502819,

5370

'channel': 'Sky News',

5371

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5372

'age_limit': 0,

5373

'view_count': int,

5374

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5375

'playable_in_embed': True,

5376

'release_date': '20220118',

5377

'availability': 'public',

5378

'live_status': 'is_live',

5379

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5380

'channel_follower_count': int

5381

},

5382

'params': {

5383

'skip_download': True,

5384

},

5385

'expected_warnings': ['Ignoring subtitle tracks found in '],

5386

}, {

5387

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5392

'uploader': 'The Young Turks',

5393

'uploader_id': 'TheYoungTurks',

5394

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5395

'upload_date': '20150715',

5396

'license': 'Standard YouTube License',

5397

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5398

'categories': ['News & Politics'],

5399

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5404

},

5405

'only_matching': True,

5406

}, {

5407

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5408

'only_matching': True,

5409

}, {

5410

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5411

'only_matching': True,

5412

}, {

5413

'note': 'A channel that is not live. Should raise error',

5414

'url': 'https://www.youtube.com/user/numberphile/live',

5415

'only_matching': True,

5416

}, {

5417

'url': 'https://www.youtube.com/feed/trending',

5418

'only_matching': True,

5419

}, {

5420

'url': 'https://www.youtube.com/feed/library',

5421

'only_matching': True,

5422

}, {

5423

'url': 'https://www.youtube.com/feed/history',

5424

'only_matching': True,

5425

}, {

5426

'url': 'https://www.youtube.com/feed/subscriptions',

5427

'only_matching': True,

5428

}, {

5429

'url': 'https://www.youtube.com/feed/watch_later',

5430

'only_matching': True,

5431

}, {

5432

'note': 'Recommended - redirects to home page.',

5433

'url': 'https://www.youtube.com/feed/recommended',

5434

'only_matching': True,

5435

}, {

5436

'note': 'inline playlist with not always working continuations',

5437

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5438

'only_matching': True,

5439

}, {

5440

'url': 'https://www.youtube.com/course',

5441

'only_matching': True,

5442

}, {

5443

'url': 'https://www.youtube.com/zsecurity',

5444

'only_matching': True,

5445

}, {

5446

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5447

'only_matching': True,

5448

}, {

5449

'url': 'https://www.youtube.com/TheYoungTurks/live',

5450

'only_matching': True,

5451

}, {

5452

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5459

}, {

5460

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5461

'only_matching': True,

5462

}, {

5463

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5464

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5465

'only_matching': True

5466

}, {

5467

'note': '/browse/ should redirect to /channel/',

5468

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5469

'only_matching': True

5470

}, {

5471

'note': 'VLPL, should redirect to playlist?list=PL...',

5472

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5473

'info_dict': {

5474

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5475

'uploader': 'NoCopyrightSounds',

5476

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5477

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5478

'title': 'NCS : All Releases 💿',

5479

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5480

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5481

'modified_date': r're:\d{8}',

5482

'view_count': int,

5483

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5484

'tags': [],

5485

'channel': 'NoCopyrightSounds',

5486

'availability': 'public',

5487

},

5488

'playlist_mincount': 166,

5489

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5490

}, {

5491

'note': 'Topic, should redirect to playlist?list=UU...',

5492

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5493

'info_dict': {

5494

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5495

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5496

'title': 'Uploads from Royalty Free Music - Topic',

5497

'uploader': 'Royalty Free Music - Topic',

5498

'tags': [],

5499

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5500

'channel': 'Royalty Free Music - Topic',

5501

'view_count': int,

5502

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5503

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5504

'modified_date': r're:\d{8}',

5505

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5506

'description': '',

5507

'availability': 'public',

5508

},

5509

'expected_warnings': [

5510

'The URL does not have a videos tab',

5511

r'[Uu]navailable videos (are|will be) hidden',

5512

],

5513

'playlist_mincount': 101,

5514

}, {

5515

'note': 'Topic without a UU playlist',

5516

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5517

'info_dict': {

5518

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5519

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5520

'tags': [],

5521

},

5522

'expected_warnings': [

5523

'the playlist redirect gave error',

5524

],

5525

'playlist_mincount': 9,

5526

}, {

5527

'note': 'Youtube music Album',

5528

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5529

'info_dict': {

5530

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5531

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5536

'modified_date': r're:\d{8}',

5537

},

5538

'playlist_count': 50,

5539

}, {

5540

'note': 'unlisted single video playlist',

5541

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5542

'info_dict': {

5543

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5544

'uploader': 'colethedj',

5545

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5546

'title': 'yt-dlp unlisted playlist test',

5547

'availability': 'unlisted',

5548

'tags': [],

5549

'modified_date': '20220418',

5550

'channel': 'colethedj',

5551

'view_count': int,

5552

'description': '',

5553

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5554

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5555

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5560

'url': 'https://www.youtube.com/feed/recommended',

5561

'info_dict': {

5562

'id': 'recommended',

5563

'title': 'recommended',

5564

'tags': [],

5565

},

5566

'playlist_mincount': 50,

5567

'params': {

5568

'skip_download': True,

5569

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5570

},

5571

}, {

5572

'note': 'API Fallback: /videos tab, sorted by oldest first',

5573

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5574

'info_dict': {

5575

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5576

'title': 'Cody\'sLab - Videos',

5577

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5578

'uploader': 'Cody\'sLab',

5579

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5580

'channel': 'Cody\'sLab',

5581

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5582

'tags': [],

5583

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5584

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5585

'channel_follower_count': int

5586

},

5587

'playlist_mincount': 650,

5588

'params': {

5589

'skip_download': True,

5590

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5591

},

5592

}, {

5593

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5594

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5595

'info_dict': {

5596

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5597

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5598

'title': 'Uploads from Royalty Free Music - Topic',

5599

'uploader': 'Royalty Free Music - Topic',

5600

'modified_date': r're:\d{8}',

5601

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5602

'description': '',

5603

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5604

'tags': [],

5605

'channel': 'Royalty Free Music - Topic',

5606

'view_count': int,

5607

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5608

'availability': 'public',

5609

},

5610

'expected_warnings': [

5611

'does not have a videos tab',

5612

r'[Uu]navailable videos (are|will be) hidden',

5613

],

5614

'playlist_mincount': 101,

5615

'params': {

5616

'skip_download': True,

5617

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5618

},

5619

}, {

5620

'note': 'non-standard redirect to regional channel',

5621

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5622

'only_matching': True

5623

}, {

5624

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5625

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5626

'info_dict': {

5627

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5628

'modified_date': '20220407',

5629

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5630

'tags': [],

5631

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5632

'uploader': 'pukkandan',

5633

'availability': 'unlisted',

5634

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5635

'channel': 'pukkandan',

5636

'description': 'Test for collaborative playlist',

5637

'title': 'yt-dlp test - collaborative playlist',

5638

'view_count': int,

5639

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5640

},

5641

'playlist_mincount': 2

5642

}, {

5643

'note': 'translated tab name',

5644

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5645

'info_dict': {

5646

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5647

'tags': [],

5648

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5649

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5650

'description': '',

5651

'title': 'cole-dlp-test-acc - 再生リスト',

5652

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5653

'uploader': 'cole-dlp-test-acc',

5654

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5655

'channel': 'cole-dlp-test-acc',

5656

},

5657

'playlist_mincount': 1,

5658

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5659

'expected_warnings': ['Preferring "ja"'],

5660

}, {

5661

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5662

'note': 'preferred lang set with playlist with translated video titles',

5663

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5664

'info_dict': {

5665

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5666

'tags': [],

5667

'view_count': int,

5668

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5669

'uploader': 'cole-dlp-test-acc',

5670

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5671

'channel': 'cole-dlp-test-acc',

5672

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5673

'description': 'test',

5674

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5675

'title': 'dlp test playlist',

5676

'availability': 'public',

5677

},

5678

'playlist_mincount': 1,

5679

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5680

'expected_warnings': ['Preferring "ja"'],

5681

}, {

5682

# shorts audio pivot for 2GtVksBMYFM.

5683

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5684

'info_dict': {

5685

'id': 'sfv_audio_pivot',

5686

'title': 'sfv_audio_pivot',

5687

'tags': [],

5688

},

5689

'playlist_mincount': 50,

}]

@classmethod

def suitable(cls, url):

5695

return False if YoutubeIE.suitable(url) else super().suitable(url)

5696

5697

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5698

5699

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5700

def _real_extract(self, url, smuggled_data):

5701

item_id = self._match_id(url)

5702

url = urllib.parse.urlunparse(

5703

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5704

compat_opts = self.get_param('compat_opts', [])

5705

5706

def get_mobj(url):

5707

mobj = self._URL_RE.match(url).groupdict()

5708

mobj.update((k, '') for k, v in mobj.items() if v is None)

5709

return mobj

5710

5711

mobj, redirect_warning = get_mobj(url), None

5712

# Youtube returns incomplete data if tabname is not lower case

5713

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5714

if is_channel:

5715

if smuggled_data.get('is_music_url'):

5716

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5717

item_id = item_id[2:]

5718

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5719

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5720

mdata = self._extract_tab_endpoint(

5721

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5722

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5723

get_all=False, expected_type=str)

5724

if not murl:

5725

raise ExtractorError('Failed to resolve album to playlist')

5726

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5727

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5728

pre = f'https://www.youtube.com/channel/{item_id}'

5729

5730

original_tab_name = tab

5731

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5732

# Home URLs should redirect to /videos/

5733

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5734

'To download only the videos in the home page, add a "/featured" to the URL')

5735

tab = '/videos'

5736

5737

url = ''.join((pre, tab, post))

5738

mobj = get_mobj(url)

5739

5740

# Handle both video/playlist URLs

5741

qs = parse_qs(url)

5742

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5743

5744

if not video_id and mobj['not_channel'].startswith('watch'):

5745

if not playlist_id:

5746

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5747

raise ExtractorError('Unable to recognize tab page')

5748

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5749

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5750

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5751

mobj = get_mobj(url)

5752

5753

if video_id and playlist_id:

5754

if self.get_param('noplaylist'):

5755

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5756

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5757

ie=YoutubeIE.ie_key(), video_id=video_id)

5758

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5759

5760

data, ytcfg = self._extract_data(url, item_id)

5761

5762

# YouTube may provide a non-standard redirect to the regional channel

5763

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5764

redirect_url = traverse_obj(

5765

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5766

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5767

redirect_url = ''.join((

5768

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5769

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5770

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5771

5772

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5773

if tabs:

5774

selected_tab = self._extract_selected_tab(tabs)

5775

selected_tab_url = urljoin(

5776

url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

5777

translated_tab_name = selected_tab.get('title', '').lower()

5778

5779

# Prefer tab name from tab url as it is always in en,

5780

# but only when preferred lang is set as it may not extract reliably in all cases.

5781

selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name

5782

or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary

5783

or translated_tab_name)

5784

5785

if selected_tab_name == 'home':

5786

selected_tab_name = 'featured'

5787

requested_tab_name = mobj['tab'][1:]

5788

5789

if 'no-youtube-channel-redirect' not in compat_opts:

5790

if requested_tab_name == 'live': # Live tab should have redirected to the video

5791

raise UserNotLive(video_id=mobj['id'])

5792

if requested_tab_name not in ('', selected_tab_name):

5793

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5794

if not original_tab_name:

5795

if item_id[:2] == 'UC':

5796

# Topic channels don't have /videos. Use the equivalent playlist instead

5797

pl_id = f'UU{item_id[2:]}'

5798

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5799

try:

5800

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5801

except ExtractorError:

5802

redirect_warning += ' and the playlist redirect gave error'

5803

else:

5804

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5805

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5806

if selected_tab_name and selected_tab_name != requested_tab_name:

5807

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5808

else:

5809

raise ExtractorError(redirect_warning, expected=True)

5810

5811

if redirect_warning:

5812

self.to_screen(redirect_warning)

5813

self.write_debug(f'Final URL: {url}')

5814

5815

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5816

if 'no-youtube-unavailable-videos' not in compat_opts:

5817

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5818

self._extract_and_report_alerts(data, only_once=True)

5819

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5820

if tabs:

5821

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5822

5823

playlist = traverse_obj(

5824

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5825

if playlist:

5826

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5827

5828

video_id = traverse_obj(

5829

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5830

if video_id:

5831

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5832

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5833

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5834

ie=YoutubeIE.ie_key(), video_id=video_id)

5835

5836

raise ExtractorError('Unable to recognize tab page')

5837

5838

5839

class YoutubePlaylistIE(InfoExtractor):

5840

IE_DESC = 'YouTube playlists'

5841

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5852

)''' % {

5853

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5854

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5855

}

5856

IE_NAME = 'youtube:playlist'

5857

_TESTS = [{

5858

'note': 'issue #673',

5859

'url': 'PLBB231211A4F62143',

5860

'info_dict': {

5861

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5862

'id': 'PLBB231211A4F62143',

5863

'uploader': 'Wickman',

5864

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5865

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5866

'view_count': int,

5867

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5868

'modified_date': r're:\d{8}',

5869

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5870

'channel': 'Wickman',

5871

'tags': [],

5872

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5873

},

5874

'playlist_mincount': 29,

5875

}, {

5876

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5877

'info_dict': {

5878

'title': 'YDL_safe_search',

5879

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5880

},

5881

'playlist_count': 2,

5882

'skip': 'This playlist is private',

5883

}, {

5884

'note': 'embedded',

5885

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5890

'uploader': 'milan',

5891

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5892

'description': '',

5893

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5894

'tags': [],

5895

'modified_date': '20140919',

5896

'view_count': int,

5897

'channel': 'milan',

5898

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5899

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5900

'availability': 'public',

5901

},

5902

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5903

}, {

5904

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5905

'playlist_mincount': 455,

5906

'info_dict': {

5907

'title': '2018 Chinese New Singles (11/6 updated)',

5908

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5909

'uploader': 'LBK',

5910

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5911

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5912

'channel': 'LBK',

5913

'view_count': int,

5914

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5915

'tags': [],

5916

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5917

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5918

'modified_date': r're:\d{8}',

5919

'availability': 'public',

5920

},

5921

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5922

}, {

5923

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5924

'only_matching': True,

5925

}, {

5926

# music album playlist

5927

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5928

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5933

if YoutubeTabIE.suitable(url):

5934

return False

5935

from ..utils import parse_qs

5936

qs = parse_qs(url)

5937

if qs.get('v', [None])[0]:

5938

return False

5939

return super().suitable(url)

5940

5941

def _real_extract(self, url):

5942

playlist_id = self._match_id(url)

5943

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5944

url = update_url_query(

5945

'https://www.youtube.com/playlist',

5946

parse_qs(url) or {'list': playlist_id})

5947

if is_music_url:

5948

url = smuggle_url(url, {'is_music_url': True})

5949

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5950

5951

5952

class YoutubeYtBeIE(InfoExtractor):

5953

IE_DESC = 'youtu.be'

5954

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5955

_TESTS = [{

5956

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5961

'uploader': 'Backus-Page House Museum',

5962

'uploader_id': 'backuspagemuseum',

5963

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5964

'upload_date': '20161008',

5965

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5966

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5971

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5972

'channel': 'Backus-Page House Museum',

5973

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5974

'live_status': 'not_live',

5975

'view_count': int,

5976

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5977

'availability': 'public',

5978

'duration': 59,

5979

'comment_count': int,

5980

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5985

},

5986

}, {

5987

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5988

'only_matching': True,

5989

}]

5990

5991

def _real_extract(self, url):

5992

mobj = self._match_valid_url(url)

5993

video_id = mobj.group('id')

5994

playlist_id = mobj.group('playlist_id')

5995

return self.url_result(

5996

update_url_query('https://www.youtube.com/watch', {

5997

'v': video_id,

5998

'list': playlist_id,

5999

'feature': 'youtu.be',

6000

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6001

6002

6003

class YoutubeLivestreamEmbedIE(InfoExtractor):

6004

IE_DESC = 'YouTube livestream embeds'

6005

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6006

_TESTS = [{

6007

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6008

'only_matching': True,

6009

}]

6010

6011

def _real_extract(self, url):

6012

channel_id = self._match_id(url)

6013

return self.url_result(

6014

f'https://www.youtube.com/channel/{channel_id}/live',

6015

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6016

6017

6018

class YoutubeYtUserIE(InfoExtractor):

6019

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6020

IE_NAME = 'youtube:user'

6021

_VALID_URL = r'ytuser:(?P<id>.+)'

6022

_TESTS = [{

6023

'url': 'ytuser:phihag',

6024

'only_matching': True,

6025

}]

6026

6027

def _real_extract(self, url):

6028

user_id = self._match_id(url)

6029

return self.url_result(

6030

'https://www.youtube.com/user/%s/videos' % user_id,

6031

ie=YoutubeTabIE.ie_key(), video_id=user_id)

6032

6033

6034

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6035

IE_NAME = 'youtube:favorites'

6036

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6037

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6038

_LOGIN_REQUIRED = True

6039

_TESTS = [{

6040

'url': ':ytfav',

6041

'only_matching': True,

6042

}, {

6043

'url': ':ytfavorites',

6044

'only_matching': True,

6045

}]

6046

6047

def _real_extract(self, url):

6048

return self.url_result(

6049

'https://www.youtube.com/playlist?list=LL',

6050

ie=YoutubeTabIE.ie_key())

6051

6052

6053

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6054

IE_NAME = 'youtube:notif'

6055

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6056

_VALID_URL = r':ytnotif(?:ication)?s?'

6057

_LOGIN_REQUIRED = True

6058

_TESTS = [{

6059

'url': ':ytnotif',

6060

'only_matching': True,

6061

}, {

6062

'url': ':ytnotifications',

6063

'only_matching': True,

6064

}]

6065

6066

def _extract_notification_menu(self, response, continuation_list):

6067

notification_list = traverse_obj(

6068

response,

6069

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6070

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6071

expected_type=list) or []

6072

continuation_list[0] = None

6073

for item in notification_list:

6074

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6075

if entry:

6076

yield entry

6077

continuation = item.get('continuationItemRenderer')

6078

if continuation:

6079

continuation_list[0] = continuation

6080

6081

def _extract_notification_renderer(self, notification):

6082

video_id = traverse_obj(

6083

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6084

url = f'https://www.youtube.com/watch?v={video_id}'

6085

channel_id = None

6086

if not video_id:

6087

browse_ep = traverse_obj(

6088

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6089

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6090

post_id = self._search_regex(

6091

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6092

'post id', default=None)

6093

if not channel_id or not post_id:

6094

return

6095

# The direct /post url redirects to this in the browser

6096

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6097

6098

channel = traverse_obj(

6099

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6100

expected_type=str)

6101

notification_title = self._get_text(notification, 'shortMessage')

6102

if notification_title:

6103

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6104

# TODO: handle recommended videos

6105

title = self._search_regex(

6106

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6107

'video title', default=None)

6108

upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')

6109

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6115

'video_id': video_id,

6116

'title': title,

6117

'channel_id': channel_id,

6118

'channel': channel,

6119

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6120

'upload_date': upload_date,

6121

}

6122

6123

def _notification_menu_entries(self, ytcfg):

6124

continuation_list = [None]

6125

response = None

6126

for page in itertools.count(1):

6127

ctoken = traverse_obj(

6128

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6129

response = self._extract_response(

6130

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6131

ep='notification/get_notification_menu', check_get_keys='actions',

6132

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6133

yield from self._extract_notification_menu(response, continuation_list)

6134

if not continuation_list[0]:

6135

break

6136

6137

def _real_extract(self, url):

6138

display_id = 'notifications'

6139

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6140

self._report_playlist_authcheck(ytcfg)

6141

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6142

6143

6144

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6145

IE_DESC = 'YouTube search'

6146

IE_NAME = 'youtube:search'

6147

_SEARCH_KEY = 'ytsearch'

6148

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6149

_TESTS = [{

6150

'url': 'ytsearch5:youtube-dl test video',

6151

'playlist_count': 5,

6152

'info_dict': {

6153

'id': 'youtube-dl test video',

6154

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6160

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6161

_SEARCH_KEY = 'ytsearchdate'

6162

IE_DESC = 'YouTube search, newest videos first'

6163

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6164

_TESTS = [{

6165

'url': 'ytsearchdate5:youtube-dl test video',

6166

'playlist_count': 5,

6167

'info_dict': {

6168

'id': 'youtube-dl test video',

6169

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6175

IE_DESC = 'YouTube search URLs with sorting and filter support'

6176

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6177

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6178

_TESTS = [{

6179

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6180

'playlist_mincount': 5,

6181

'info_dict': {

6182

'id': 'youtube-dl test video',

6183

'title': 'youtube-dl test video',

6184

}

6185

}, {

6186

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6187

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6194

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6199

# 'entries': [{

6200

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6206

'only_matching': True,

6207

}]

6208

6209

def _real_extract(self, url):

6210

qs = parse_qs(url)

6211

query = (qs.get('search_query') or qs.get('q'))[0]

6212

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6213

6214

6215

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6216

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6217

IE_NAME = 'youtube:music:search_url'

6218

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6219

_TESTS = [{

6220

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6221

'playlist_count': 16,

6222

'info_dict': {

6223

'id': 'royalty free music',

6224

'title': 'royalty free music',

6225

}

6226

}, {

6227

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6228

'playlist_mincount': 30,

6229

'info_dict': {

6230

'id': 'royalty free music - songs',

6231

'title': 'royalty free music - songs',

6232

},

6233

'params': {'extract_flat': 'in_playlist'}

6234

}, {

6235

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6236

'playlist_mincount': 30,

6237

'info_dict': {

6238

'id': 'royalty free music - community playlists',

6239

'title': 'royalty free music - community playlists',

6240

},

6241

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6246

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6247

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6248

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6249

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6250

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6251

}

6252

6253

def _real_extract(self, url):

6254

qs = parse_qs(url)

6255

query = (qs.get('search_query') or qs.get('q'))[0]

6256

params = qs.get('sp', (None,))[0]

6257

if params:

6258

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6259

else:

6260

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6261

params = self._SECTIONS.get(section)

6262

if not params:

6263

section = None

6264

title = join_nonempty(query, section, delim=' - ')

6265

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6266

6267

6268

class YoutubeFeedsInfoExtractor(InfoExtractor):

6269

"""

6270

Base class for feed extractors

6271

Subclasses must re-define the _FEED_NAME property.

6272

"""

6273

_LOGIN_REQUIRED = True

6274

_FEED_NAME = 'feeds'

6275

6276

def _real_initialize(self):

6277

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6282

6283

def _real_extract(self, url):

6284

return self.url_result(

6285

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6286

6287

6288

class YoutubeWatchLaterIE(InfoExtractor):

6289

IE_NAME = 'youtube:watchlater'

6290

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6291

_VALID_URL = r':ytwatchlater'

6292

_TESTS = [{

6293

'url': ':ytwatchlater',

6294

'only_matching': True,

6295

}]

6296

6297

def _real_extract(self, url):

6298

return self.url_result(

6299

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6300

6301

6302

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6303

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6304

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6305

_FEED_NAME = 'recommended'

6306

_LOGIN_REQUIRED = False

6307

_TESTS = [{

6308

'url': ':ytrec',

6309

'only_matching': True,

6310

}, {

6311

'url': ':ytrecommended',

6312

'only_matching': True,

6313

}, {

6314

'url': 'https://youtube.com',

6315

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6320

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6321

_VALID_URL = r':ytsub(?:scription)?s?'

6322

_FEED_NAME = 'subscriptions'

6323

_TESTS = [{

6324

'url': ':ytsubs',

6325

'only_matching': True,

6326

}, {

6327

'url': ':ytsubscriptions',

6328

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6333

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6334

_VALID_URL = r':ythis(?:tory)?'

6335

_FEED_NAME = 'history'

6336

_TESTS = [{

6337

'url': ':ythistory',

6338

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6343

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6344

IE_NAME = 'youtube:stories'

6345

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6346

_TESTS = [{

6347

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6348

'only_matching': True,

6349

}]

6350

6351

def _real_extract(self, url):

6352

playlist_id = f'RLTD{self._match_id(url)}'

6353

return self.url_result(

6354

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6355

ie=YoutubeTabIE, video_id=playlist_id)

6356

6357

6358

class YoutubeShortsAudioPivotIE(InfoExtractor):

6359

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6360

IE_NAME = 'youtube:shorts:pivot:audio'

6361

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6362

_TESTS = [{

6363

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6364

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6369

"""

6370

Generates sfv_audio_pivot browse params for this video id

6371

"""

6372

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6373

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6374

6375

def _real_extract(self, url):

6376

video_id = self._match_id(url)

6377

return self.url_result(

6378

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6383

IE_NAME = 'youtube:truncated_url'

6384

IE_DESC = False # Do not list

6385

_VALID_URL = r'''(?x)

6386

(?:https?://)?

6387

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6388

(?:watch\?(?:

6389

feature=[a-z_]+|

6390

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6403

'only_matching': True,

6404

}, {

6405

'url': 'https://www.youtube.com/watch?',

6406

'only_matching': True,

6407

}, {

6408

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6409

'only_matching': True,

6410

}, {

6411

'url': 'https://www.youtube.com/watch?feature=foo',

6412

'only_matching': True,

6413

}, {

6414

'url': 'https://www.youtube.com/watch?hl=en-GB',

6415

'only_matching': True,

6416

}, {

6417

'url': 'https://www.youtube.com/watch?t=2372',

6418

'only_matching': True,

6419

}]

6420

6421

def _real_extract(self, url):

6422

raise ExtractorError(

6423

'Did you forget to quote the URL? Remember that & is a meta '

6424

'character in most shells, so you want to put the URL in quotes, '

6425

'like youtube-dl '

6426

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6427

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6432

IE_NAME = 'youtube:clip'

6433

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6434

_TESTS = [{

6435

# FIXME: Other metadata should be extracted from the clip, not from the base video

6436

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6437

'info_dict': {

6438

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6439

'ext': 'mp4',

6440

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6445

'categories': ['Gaming'],

6446

'channel': 'Scott The Woz',

6447

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6448

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6449

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6450

'like_count': int,

6451

'playable_in_embed': True,

6452

'tags': 'count:17',

6453

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6454

'title': 'Mobile Games on Console - Scott The Woz',

6455

'upload_date': '20210920',

6456

'uploader': 'Scott The Woz',

6457

'uploader_id': 'scottthewoz',

6458

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6459

'view_count': int,

6460

'live_status': 'not_live',

6461

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6466

clip_id = self._match_id(url)

6467

_, data = self._extract_webpage(url, clip_id)

6468

6469

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6470

if not video_id:

6471

raise ExtractorError('Unable to find video ID')

6472

6473

clip_data = traverse_obj(data, (

6474

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6475

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6476

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6477

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6478

6479

return {

6480

'_type': 'url_transparent',

6481

'url': f'https://www.youtube.com/watch?v={video_id}',

6482

'ie_key': YoutubeIE.ie_key(),

6483

'id': clip_id,

6484

'section_start': int(clip_data['startTimeMs']) / 1000,

6485

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6490

IE_NAME = 'youtube:truncated_id'

6491

IE_DESC = False # Do not list

6492

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6493

6494

_TESTS = [{

6495

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6496

'only_matching': True,

6497

}]

6498

6499

def _real_extract(self, url):

6500

video_id = self._match_id(url)

6501

raise ExtractorError(

6502

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6503

expected=True)