jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import enum
	6	import hashlib
	7	import itertools
	8	import json
	9	import math
	10	import os.path
	11	import random
	12	import re
	13	import sys
	14	import threading
	15	import time
	16	import traceback
	17	import urllib.error
	18	import urllib.parse
	19
	20	from .common import InfoExtractor, SearchInfoExtractor
	21	from .openload import PhantomJSwrapper
	22	from ..compat import functools
	23	from ..jsinterp import JSInterpreter
	24	from ..utils import (
	25	NO_DEFAULT,
	26	ExtractorError,
	27	LazyList,
	28	UserNotLive,
	29	bug_reports_message,
	30	classproperty,
	31	clean_html,
	32	datetime_from_str,
	33	dict_get,
	34	filter_dict,
	35	float_or_none,
	36	format_field,
	37	get_first,
	38	int_or_none,
	39	is_html,
	40	join_nonempty,
	41	js_to_json,
	42	mimetype2ext,
	43	network_exceptions,
	44	orderedSet,
	45	parse_codecs,
	46	parse_count,
	47	parse_duration,
	48	parse_iso8601,
	49	parse_qs,
	50	qualities,
	51	remove_start,
	52	smuggle_url,
	53	str_or_none,
	54	str_to_int,
	55	strftime_or_none,
	56	traverse_obj,
	57	try_get,
	58	unescapeHTML,
	59	unified_strdate,
	60	unified_timestamp,
	61	unsmuggle_url,
	62	update_url_query,
	63	url_or_none,
	64	urljoin,
	65	variadic,
	66	)
	67
	68	# any clients starting with _ cannot be explicitly requested by the user
	69	INNERTUBE_CLIENTS = {
	70	'web': {
	71	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	72	'INNERTUBE_CONTEXT': {
	73	'client': {
	74	'clientName': 'WEB',
	75	'clientVersion': '2.20220801.00.00',
	76	}
	77	},
	78	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	79	},
	80	'web_embedded': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB_EMBEDDED_PLAYER',
	85	'clientVersion': '1.20220731.00.00',
	86	},
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	89	},
	90	'web_music': {
	91	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	92	'INNERTUBE_HOST': 'music.youtube.com',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_REMIX',
	96	'clientVersion': '1.20220727.01.00',
	97	}
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	100	},
	101	'web_creator': {
	102	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_CREATOR',
	106	'clientVersion': '1.20220726.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	110	},
	111	'android': {
	112	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'ANDROID',
	116	'clientVersion': '17.31.35',
	117	'androidSdkVersion': 30,
	118	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	119	}
	120	},
	121	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	122	'REQUIRE_JS_PLAYER': False
	123	},
	124	'android_embedded': {
	125	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	126	'INNERTUBE_CONTEXT': {
	127	'client': {
	128	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	129	'clientVersion': '17.31.35',
	130	'androidSdkVersion': 30,
	131	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	132	},
	133	},
	134	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	135	'REQUIRE_JS_PLAYER': False
	136	},
	137	'android_music': {
	138	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	139	'INNERTUBE_CONTEXT': {
	140	'client': {
	141	'clientName': 'ANDROID_MUSIC',
	142	'clientVersion': '5.16.51',
	143	'androidSdkVersion': 30,
	144	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	145	}
	146	},
	147	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	148	'REQUIRE_JS_PLAYER': False
	149	},
	150	'android_creator': {
	151	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	152	'INNERTUBE_CONTEXT': {
	153	'client': {
	154	'clientName': 'ANDROID_CREATOR',
	155	'clientVersion': '22.30.100',
	156	'androidSdkVersion': 30,
	157	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '17.33.2',
	171	'deviceModel': 'iPhone14,3',
	172	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	173	}
	174	},
	175	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	176	'REQUIRE_JS_PLAYER': False
	177	},
	178	'ios_embedded': {
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MESSAGES_EXTENSION',
	182	'clientVersion': '17.33.2',
	183	'deviceModel': 'iPhone14,3',
	184	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '5.21',
	196	'deviceModel': 'iPhone14,3',
	197	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	198	},
	199	},
	200	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	201	'REQUIRE_JS_PLAYER': False
	202	},
	203	'ios_creator': {
	204	'INNERTUBE_CONTEXT': {
	205	'client': {
	206	'clientName': 'IOS_CREATOR',
	207	'clientVersion': '22.33.101',
	208	'deviceModel': 'iPhone14,3',
	209	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	210	},
	211	},
	212	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	213	'REQUIRE_JS_PLAYER': False
	214	},
	215	# mweb has 'ultralow' formats
	216	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	217	'mweb': {
	218	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	219	'INNERTUBE_CONTEXT': {
	220	'client': {
	221	'clientName': 'MWEB',
	222	'clientVersion': '2.20220801.00.00',
	223	}
	224	},
	225	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	226	},
	227	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	228	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	229	'tv_embedded': {
	230	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	231	'INNERTUBE_CONTEXT': {
	232	'client': {
	233	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	234	'clientVersion': '2.0',
	235	},
	236	},
	237	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	238	},
	239	}
	240
	241
	242	def _split_innertube_client(client_name):
	243	variant, *base = client_name.rsplit('.', 1)
	244	if base:
	245	return variant, base[0], variant
	246	base, *variant = client_name.split('_', 1)
	247	return client_name, base, variant[0] if variant else None
	248
	249
	250	def build_innertube_clients():
	251	THIRD_PARTY = {
	252	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	253	}
	254	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	255	priority = qualities(BASE_CLIENTS[::-1])
	256
	257	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	258	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	259	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	260	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	261	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	262
	263	_, base_client, variant = _split_innertube_client(client)
	264	ytcfg['priority'] = 10 * priority(base_client)
	265
	266	if not variant:
	267	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	268	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	269	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	270	embedscreen['priority'] -= 3
	271	elif variant == 'embedded':
	272	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	273	ytcfg['priority'] -= 2
	274	else:
	275	ytcfg['priority'] -= 3
	276
	277
	278	build_innertube_clients()
	279
	280
	281	class BadgeType(enum.Enum):
	282	AVAILABILITY_UNLISTED = enum.auto()
	283	AVAILABILITY_PRIVATE = enum.auto()
	284	AVAILABILITY_PUBLIC = enum.auto()
	285	AVAILABILITY_PREMIUM = enum.auto()
	286	AVAILABILITY_SUBSCRIPTION = enum.auto()
	287	LIVE_NOW = enum.auto()
	288
	289
	290	class YoutubeBaseInfoExtractor(InfoExtractor):
	291	"""Provide base functions for Youtube extractors"""
	292
	293	_RESERVED_NAMES = (
	294	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	295	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	296	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	297	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	298
	299	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	300
	301	# _NETRC_MACHINE = 'youtube'
	302
	303	# If True it will raise an error if no login info is provided
	304	_LOGIN_REQUIRED = False
	305
	306	_INVIDIOUS_SITES = (
	307	# invidious-redirect websites
	308	r'(?:www\.)?redirect\.invidious\.io',
	309	r'(?:(?:www\|dev)\.)?invidio\.us',
	310	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	311	r'(?:www\.)?invidious\.pussthecat\.org',
	312	r'(?:www\.)?invidious\.zee\.li',
	313	r'(?:www\.)?invidious\.ethibox\.fr',
	314	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	315	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	316	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	317	# youtube-dl invidious instances list
	318	r'(?:(?:www\|no)\.)?invidiou\.sh',
	319	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	320	r'(?:www\.)?invidious\.kabi\.tk',
	321	r'(?:www\.)?invidious\.mastodon\.host',
	322	r'(?:www\.)?invidious\.zapashcanon\.fr',
	323	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	324	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	325	r'(?:www\.)?invidious\.himiko\.cloud',
	326	r'(?:www\.)?invidious\.reallyancient\.tech',
	327	r'(?:www\.)?invidious\.tube',
	328	r'(?:www\.)?invidiou\.site',
	329	r'(?:www\.)?invidious\.site',
	330	r'(?:www\.)?invidious\.xyz',
	331	r'(?:www\.)?invidious\.nixnet\.xyz',
	332	r'(?:www\.)?invidious\.048596\.xyz',
	333	r'(?:www\.)?invidious\.drycat\.fr',
	334	r'(?:www\.)?inv\.skyn3t\.in',
	335	r'(?:www\.)?tube\.poal\.co',
	336	r'(?:www\.)?tube\.connect\.cafe',
	337	r'(?:www\.)?vid\.wxzm\.sx',
	338	r'(?:www\.)?vid\.mint\.lgbt',
	339	r'(?:www\.)?vid\.puffyan\.us',
	340	r'(?:www\.)?yewtu\.be',
	341	r'(?:www\.)?yt\.elukerio\.org',
	342	r'(?:www\.)?yt\.lelux\.fi',
	343	r'(?:www\.)?invidious\.ggc-project\.de',
	344	r'(?:www\.)?yt\.maisputain\.ovh',
	345	r'(?:www\.)?ytprivate\.com',
	346	r'(?:www\.)?invidious\.13ad\.de',
	347	r'(?:www\.)?invidious\.toot\.koeln',
	348	r'(?:www\.)?invidious\.fdn\.fr',
	349	r'(?:www\.)?watch\.nettohikari\.com',
	350	r'(?:www\.)?invidious\.namazso\.eu',
	351	r'(?:www\.)?invidious\.silkky\.cloud',
	352	r'(?:www\.)?invidious\.exonip\.de',
	353	r'(?:www\.)?invidious\.riverside\.rocks',
	354	r'(?:www\.)?invidious\.blamefran\.net',
	355	r'(?:www\.)?invidious\.moomoo\.de',
	356	r'(?:www\.)?ytb\.trom\.tf',
	357	r'(?:www\.)?yt\.cyberhost\.uk',
	358	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	359	r'(?:www\.)?qklhadlycap4cnod\.onion',
	360	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	361	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	362	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	363	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	364	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	365	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	366	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	367	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	368	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	369	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	370	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	371	r'(?:www\.)?piped\.kavin\.rocks',
	372	r'(?:www\.)?piped\.tokhmi\.xyz',
	373	r'(?:www\.)?piped\.syncpundit\.io',
	374	r'(?:www\.)?piped\.mha\.fi',
	375	r'(?:www\.)?watch\.whatever\.social',
	376	r'(?:www\.)?piped\.garudalinux\.org',
	377	r'(?:www\.)?piped\.rivo\.lol',
	378	r'(?:www\.)?piped-libre\.kavin\.rocks',
	379	r'(?:www\.)?yt\.jae\.fi',
	380	r'(?:www\.)?piped\.mint\.lgbt',
	381	r'(?:www\.)?il\.ax',
	382	r'(?:www\.)?piped\.esmailelbob\.xyz',
	383	r'(?:www\.)?piped\.projectsegfau\.lt',
	384	r'(?:www\.)?piped\.privacydev\.net',
	385	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	386	r'(?:www\.)?piped\.smnz\.de',
	387	r'(?:www\.)?piped\.adminforge\.de',
	388	r'(?:www\.)?watch\.whatevertinfoil\.de',
	389	r'(?:www\.)?piped\.qdi\.fi',
	390	)
	391
	392	# extracted from account/account_menu ep
	393	# XXX: These are the supported YouTube UI and API languages,
	394	# which is slightly different from languages supported for translation in YouTube studio
	395	_SUPPORTED_LANG_CODES = [
	396	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	397	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	398	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	399	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	400	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	401	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	402	]
	403
	404	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	405
	406	@functools.cached_property
	407	def _preferred_lang(self):
	408	"""
	409	Returns a language code supported by YouTube for the user preferred language.
	410	Returns None if no preferred language set.
	411	"""
	412	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	413	if not preferred_lang:
	414	return
	415	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	416	raise ExtractorError(
	417	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	418	expected=True)
	419	elif preferred_lang != 'en':
	420	self.report_warning(
	421	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	422	return preferred_lang
	423
	424	def _initialize_consent(self):
	425	cookies = self._get_cookies('https://www.youtube.com/')
	426	if cookies.get('__Secure-3PSID'):
	427	return
	428	consent_id = None
	429	consent = cookies.get('CONSENT')
	430	if consent:
	431	if 'YES' in consent.value:
	432	return
	433	consent_id = self._search_regex(
	434	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	435	if not consent_id:
	436	consent_id = random.randint(100, 999)
	437	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	438
	439	def _initialize_pref(self):
	440	cookies = self._get_cookies('https://www.youtube.com/')
	441	pref_cookie = cookies.get('PREF')
	442	pref = {}
	443	if pref_cookie:
	444	try:
	445	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	446	except ValueError:
	447	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	448	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	449	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	450
	451	def _real_initialize(self):
	452	self._initialize_pref()
	453	self._initialize_consent()
	454	self._check_login_required()
	455
	456	def _check_login_required(self):
	457	if self._LOGIN_REQUIRED and not self._cookies_passed:
	458	self.raise_login_required('Login details are needed to download this content', method='cookies')
	459
	460	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	461	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	462
	463	def _get_default_ytcfg(self, client='web'):
	464	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	465
	466	def _get_innertube_host(self, client='web'):
	467	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	468
	469	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	470	# try_get but with fallback to default ytcfg client values when present
	471	_func = lambda y: try_get(y, getter, expected_type)
	472	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	473
	474	def _extract_client_name(self, ytcfg, default_client='web'):
	475	return self._ytcfg_get_safe(
	476	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	477	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	478
	479	def _extract_client_version(self, ytcfg, default_client='web'):
	480	return self._ytcfg_get_safe(
	481	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	482	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	483
	484	def _select_api_hostname(self, req_api_hostname, default_client=None):
	485	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	486	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	487
	488	def _extract_api_key(self, ytcfg=None, default_client='web'):
	489	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	490
	491	def _extract_context(self, ytcfg=None, default_client='web'):
	492	context = get_first(
	493	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	494	# Enforce language and tz for extraction
	495	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	496	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	497	return context
	498
	499	_SAPISID = None
	500

1

import base64

import calendar

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

21

from .openload import PhantomJSwrapper

22

from ..compat import functools

23

from ..jsinterp import JSInterpreter

24

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

69

INNERTUBE_CLIENTS = {

70

'web': {

71

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

72

'INNERTUBE_CONTEXT': {

73

'client': {

74

'clientName': 'WEB',

75

'clientVersion': '2.20220801.00.00',

76

}

77

},

78

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

79

},

80

'web_embedded': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB_EMBEDDED_PLAYER',

85

'clientVersion': '1.20220731.00.00',

86

},

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

89

},

90

'web_music': {

91

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

92

'INNERTUBE_HOST': 'music.youtube.com',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_REMIX',

96

'clientVersion': '1.20220727.01.00',

97

}

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

100

},

101

'web_creator': {

102

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_CREATOR',

106

'clientVersion': '1.20220726.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

110

},

111

'android': {

112

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'ANDROID',

116

'clientVersion': '17.31.35',

117

'androidSdkVersion': 30,

118

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

119

}

120

},

121

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

122

'REQUIRE_JS_PLAYER': False

123

},

124

'android_embedded': {

125

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

126

'INNERTUBE_CONTEXT': {

127

'client': {

128

'clientName': 'ANDROID_EMBEDDED_PLAYER',

129

'clientVersion': '17.31.35',

130

'androidSdkVersion': 30,

131

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

132

},

133

},

134

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

135

'REQUIRE_JS_PLAYER': False

136

},

137

'android_music': {

138

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

139

'INNERTUBE_CONTEXT': {

140

'client': {

141

'clientName': 'ANDROID_MUSIC',

142

'clientVersion': '5.16.51',

143

'androidSdkVersion': 30,

144

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

145

}

146

},

147

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

148

'REQUIRE_JS_PLAYER': False

149

},

150

'android_creator': {

151

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

152

'INNERTUBE_CONTEXT': {

153

'client': {

154

'clientName': 'ANDROID_CREATOR',

155

'clientVersion': '22.30.100',

156

'androidSdkVersion': 30,

157

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '17.33.2',

171

'deviceModel': 'iPhone14,3',

172

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

173

}

174

},

175

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

176

'REQUIRE_JS_PLAYER': False

177

},

178

'ios_embedded': {

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MESSAGES_EXTENSION',

182

'clientVersion': '17.33.2',

183

'deviceModel': 'iPhone14,3',

184

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '5.21',

196

'deviceModel': 'iPhone14,3',

197

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

198

},

199

},

200

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

201

'REQUIRE_JS_PLAYER': False

202

},

203

'ios_creator': {

204

'INNERTUBE_CONTEXT': {

205

'client': {

206

'clientName': 'IOS_CREATOR',

207

'clientVersion': '22.33.101',

208

'deviceModel': 'iPhone14,3',

209

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

210

},

211

},

212

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

213

'REQUIRE_JS_PLAYER': False

214

},

215

# mweb has 'ultralow' formats

216

# See: https://github.com/yt-dlp/yt-dlp/pull/557

217

'mweb': {

218

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

219

'INNERTUBE_CONTEXT': {

220

'client': {

221

'clientName': 'MWEB',

222

'clientVersion': '2.20220801.00.00',

223

}

224

},

225

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

226

},

227

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

228

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

229

'tv_embedded': {

230

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

231

'INNERTUBE_CONTEXT': {

232

'client': {

233

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

234

'clientVersion': '2.0',

235

},

236

},

237

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

243

variant, *base = client_name.rsplit('.', 1)

244

if base:

245

return variant, base[0], variant

246

base, *variant = client_name.split('_', 1)

247

return client_name, base, variant[0] if variant else None

248

249

250

def build_innertube_clients():

251

THIRD_PARTY = {

252

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

253

}

254

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

255

priority = qualities(BASE_CLIENTS[::-1])

256

257

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

258

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

259

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

260

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

261

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

262

263

_, base_client, variant = _split_innertube_client(client)

264

ytcfg['priority'] = 10 * priority(base_client)

265

266

if not variant:

267

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

268

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

269

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

270

embedscreen['priority'] -= 3

271

elif variant == 'embedded':

272

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

273

ytcfg['priority'] -= 2

274

else:

275

ytcfg['priority'] -= 3

276

277

278

build_innertube_clients()

279

280

281

class BadgeType(enum.Enum):

282

AVAILABILITY_UNLISTED = enum.auto()

283

AVAILABILITY_PRIVATE = enum.auto()

284

AVAILABILITY_PUBLIC = enum.auto()

285

AVAILABILITY_PREMIUM = enum.auto()

286

AVAILABILITY_SUBSCRIPTION = enum.auto()

287

LIVE_NOW = enum.auto()

288

289

290

class YoutubeBaseInfoExtractor(InfoExtractor):

291

"""Provide base functions for Youtube extractors"""

292

293

_RESERVED_NAMES = (

294

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

300

301

# _NETRC_MACHINE = 'youtube'

302

303

# If True it will raise an error if no login info is provided

304

_LOGIN_REQUIRED = False

305

306

_INVIDIOUS_SITES = (

307

# invidious-redirect websites

308

r'(?:www\.)?redirect\.invidious\.io',

309

r'(?:(?:www|dev)\.)?invidio\.us',

310

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

311

r'(?:www\.)?invidious\.pussthecat\.org',

312

r'(?:www\.)?invidious\.zee\.li',

313

r'(?:www\.)?invidious\.ethibox\.fr',

314

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

315

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

316

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

317

# youtube-dl invidious instances list

318

r'(?:(?:www|no)\.)?invidiou\.sh',

319

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

320

r'(?:www\.)?invidious\.kabi\.tk',

321

r'(?:www\.)?invidious\.mastodon\.host',

322

r'(?:www\.)?invidious\.zapashcanon\.fr',

323

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

324

r'(?:www\.)?invidious\.tinfoil-hat\.net',

325

r'(?:www\.)?invidious\.himiko\.cloud',

326

r'(?:www\.)?invidious\.reallyancient\.tech',

327

r'(?:www\.)?invidious\.tube',

328

r'(?:www\.)?invidiou\.site',

329

r'(?:www\.)?invidious\.site',

330

r'(?:www\.)?invidious\.xyz',

331

r'(?:www\.)?invidious\.nixnet\.xyz',

332

r'(?:www\.)?invidious\.048596\.xyz',

333

r'(?:www\.)?invidious\.drycat\.fr',

334

r'(?:www\.)?inv\.skyn3t\.in',

335

r'(?:www\.)?tube\.poal\.co',

336

r'(?:www\.)?tube\.connect\.cafe',

337

r'(?:www\.)?vid\.wxzm\.sx',

338

r'(?:www\.)?vid\.mint\.lgbt',

339

r'(?:www\.)?vid\.puffyan\.us',

340

r'(?:www\.)?yewtu\.be',

341

r'(?:www\.)?yt\.elukerio\.org',

342

r'(?:www\.)?yt\.lelux\.fi',

343

r'(?:www\.)?invidious\.ggc-project\.de',

344

r'(?:www\.)?yt\.maisputain\.ovh',

345

r'(?:www\.)?ytprivate\.com',

346

r'(?:www\.)?invidious\.13ad\.de',

347

r'(?:www\.)?invidious\.toot\.koeln',

348

r'(?:www\.)?invidious\.fdn\.fr',

349

r'(?:www\.)?watch\.nettohikari\.com',

350

r'(?:www\.)?invidious\.namazso\.eu',

351

r'(?:www\.)?invidious\.silkky\.cloud',

352

r'(?:www\.)?invidious\.exonip\.de',

353

r'(?:www\.)?invidious\.riverside\.rocks',

354

r'(?:www\.)?invidious\.blamefran\.net',

355

r'(?:www\.)?invidious\.moomoo\.de',

356

r'(?:www\.)?ytb\.trom\.tf',

357

r'(?:www\.)?yt\.cyberhost\.uk',

358

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

359

r'(?:www\.)?qklhadlycap4cnod\.onion',

360

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

361

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

362

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

363

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

364

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

365

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

366

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

367

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

368

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

369

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

370

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

371

r'(?:www\.)?piped\.kavin\.rocks',

372

r'(?:www\.)?piped\.tokhmi\.xyz',

373

r'(?:www\.)?piped\.syncpundit\.io',

374

r'(?:www\.)?piped\.mha\.fi',

375

r'(?:www\.)?watch\.whatever\.social',

376

r'(?:www\.)?piped\.garudalinux\.org',

377

r'(?:www\.)?piped\.rivo\.lol',

378

r'(?:www\.)?piped-libre\.kavin\.rocks',

379

r'(?:www\.)?yt\.jae\.fi',

380

r'(?:www\.)?piped\.mint\.lgbt',

381

r'(?:www\.)?il\.ax',

382

r'(?:www\.)?piped\.esmailelbob\.xyz',

383

r'(?:www\.)?piped\.projectsegfau\.lt',

384

r'(?:www\.)?piped\.privacydev\.net',

385

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

386

r'(?:www\.)?piped\.smnz\.de',

387

r'(?:www\.)?piped\.adminforge\.de',

388

r'(?:www\.)?watch\.whatevertinfoil\.de',

389

r'(?:www\.)?piped\.qdi\.fi',

390

)

391

392

# extracted from account/account_menu ep

393

# XXX: These are the supported YouTube UI and API languages,

394

# which is slightly different from languages supported for translation in YouTube studio

395

_SUPPORTED_LANG_CODES = [

396

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

397

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

398

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

399

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

400

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

401

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

402

]

403

404

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

405

406

@functools.cached_property

407

def _preferred_lang(self):

408

"""

409

Returns a language code supported by YouTube for the user preferred language.

410

Returns None if no preferred language set.

411

"""

412

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

413

if not preferred_lang:

414

return

415

if preferred_lang not in self._SUPPORTED_LANG_CODES:

416

raise ExtractorError(

417

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

418

expected=True)

419

elif preferred_lang != 'en':

420

self.report_warning(

421

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

422

return preferred_lang

423

424

def _initialize_consent(self):

425

cookies = self._get_cookies('https://www.youtube.com/')

426

if cookies.get('__Secure-3PSID'):

427

return

428

consent_id = None

429

consent = cookies.get('CONSENT')

430

if consent:

431

if 'YES' in consent.value:

432

return

433

consent_id = self._search_regex(

434

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

435

if not consent_id:

436

consent_id = random.randint(100, 999)

437

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

438

439

def _initialize_pref(self):

440

cookies = self._get_cookies('https://www.youtube.com/')

441

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

446

except ValueError:

447

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

448

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

449

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

450

451

def _real_initialize(self):

452

self._initialize_pref()

453

self._initialize_consent()

454

self._check_login_required()

455

456

def _check_login_required(self):

457

if self._LOGIN_REQUIRED and not self._cookies_passed:

458

self.raise_login_required('Login details are needed to download this content', method='cookies')

459

460

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

461

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

462

463

def _get_default_ytcfg(self, client='web'):

464

return copy.deepcopy(INNERTUBE_CLIENTS[client])

465

466

def _get_innertube_host(self, client='web'):

467

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

468

469

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

470

# try_get but with fallback to default ytcfg client values when present

471

_func = lambda y: try_get(y, getter, expected_type)

472

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

473

474

def _extract_client_name(self, ytcfg, default_client='web'):

475

return self._ytcfg_get_safe(

476

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

477

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

478

479

def _extract_client_version(self, ytcfg, default_client='web'):

480

return self._ytcfg_get_safe(

481

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

482

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

483

484

def _select_api_hostname(self, req_api_hostname, default_client=None):

485

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

486

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

487

488

def _extract_api_key(self, ytcfg=None, default_client='web'):

489

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

490

491

def _extract_context(self, ytcfg=None, default_client='web'):

492

context = get_first(

493

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

494

# Enforce language and tz for extraction

495

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

496

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

502

time_now = round(time.time())

503

if self._SAPISID is None:

504

yt_cookies = self._get_cookies('https://www.youtube.com')

505

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

506

# See: https://github.com/yt-dlp/yt-dlp/issues/393

507

sapisid_cookie = dict_get(

508

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

509

if sapisid_cookie and sapisid_cookie.value:

510

self._SAPISID = sapisid_cookie.value

511

self.write_debug('Extracted SAPISID cookie')

512

# SAPISID cookie is required if not already present

513

if not yt_cookies.get('SAPISID'):

514

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

515

self._set_cookie(

516

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

517

else:

518

self._SAPISID = False

519

if not self._SAPISID:

520

return None

521

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

522

sapisidhash = hashlib.sha1(

523

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

524

return f'SAPISIDHASH {time_now}_{sapisidhash}'

525

526

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

527

note='Downloading API JSON', errnote='Unable to download API page',

528

context=None, api_key=None, api_hostname=None, default_client='web'):

529

530

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

531

data.update(query)

532

real_headers = self.generate_api_headers(default_client=default_client)

533

real_headers.update({'content-type': 'application/json'})

534

if headers:

535

real_headers.update(headers)

536

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

537

or api_key or self._extract_api_key(default_client=default_client))

538

return self._download_json(

539

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

540

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

541

data=json.dumps(data).encode('utf8'), headers=real_headers,

542

query={'key': api_key, 'prettyPrint': 'false'})

543

544

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

545

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

546

547

@staticmethod

548

def _extract_session_index(*data):

549

"""

550

Index of current account in account list.

551

See: https://github.com/yt-dlp/yt-dlp/pull/519

552

"""

553

for ytcfg in data:

554

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

555

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

560

if ytcfg:

561

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

566

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

567

'identity token', default=None, fatal=False)

568

569

@staticmethod

570

def _extract_account_syncid(*args):

571

"""

572

Extract syncId required to download private playlists of secondary channels

573

@params response and/or ytcfg

574

"""

575

for data in args:

576

# ytcfg includes channel_syncid if on secondary channel

577

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

582

lambda x: x['DATASYNC_ID']), str) or '').split('||')

583

if len(sync_ids) >= 2 and sync_ids[1]:

584

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

585

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

590

"""

591

Extracts visitorData from an API response or ytcfg

592

Appears to be used to track session state

593

"""

594

return get_first(

595

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

596

expected_type=str)

597

598

@functools.cached_property

599

def is_authenticated(self):

600

return bool(self._generate_sapisidhash_header())

601

602

def extract_ytcfg(self, video_id, webpage):

603

if not webpage:

604

return {}

605

return self._parse_json(

606

self._search_regex(

607

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

608

default='{}'), video_id, fatal=False) or {}

609

610

def generate_api_headers(

611

self, *, ytcfg=None, account_syncid=None, session_index=None,

612

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

613

614

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

615

headers = {

616

'X-YouTube-Client-Name': str(

617

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

618

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

619

'Origin': origin,

620

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

621

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

622

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

623

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

624

}

625

if session_index is None:

626

session_index = self._extract_session_index(ytcfg)

627

if account_syncid or session_index is not None:

628

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

629

630

auth = self._generate_sapisidhash_header(origin)

631

if auth is not None:

632

headers['Authorization'] = auth

633

headers['X-Origin'] = origin

634

return filter_dict(headers)

635

636

def _download_ytcfg(self, client, video_id):

637

url = {

638

'web': 'https://www.youtube.com',

639

'web_music': 'https://music.youtube.com',

640

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

645

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

646

return self.extract_ytcfg(video_id, webpage) or {}

647

648

@staticmethod

649

def _build_api_continuation_query(continuation, ctp=None):

650

query = {

651

'continuation': continuation

652

}

653

# TODO: Inconsistency with clickTrackingParams.

654

# Currently we have a fixed ctp contained within context (from ytcfg)

655

# and a ctp in root query for continuation.

656

if ctp:

657

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

662

next_continuation = try_get(

663

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

664

lambda x: x['continuation']['reloadContinuationData']), dict)

665

if not next_continuation:

666

return

667

continuation = next_continuation.get('continuation')

668

if not continuation:

669

return

670

ctp = next_continuation.get('clickTrackingParams')

671

return cls._build_api_continuation_query(continuation, ctp)

672

673

@classmethod

674

def _extract_continuation_ep_data(cls, continuation_ep: dict):

675

if isinstance(continuation_ep, dict):

676

continuation = try_get(

677

continuation_ep, lambda x: x['continuationCommand']['token'], str)

678

if not continuation:

679

return

680

ctp = continuation_ep.get('clickTrackingParams')

681

return cls._build_api_continuation_query(continuation, ctp)

682

683

@classmethod

684

def _extract_continuation(cls, renderer):

685

next_continuation = cls._extract_next_continuation_data(renderer)

686

if next_continuation:

687

return next_continuation

688

689

return traverse_obj(renderer, (

690

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

691

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

692

), get_all=False, expected_type=cls._extract_continuation_ep_data)

693

694

@classmethod

695

def _extract_alerts(cls, data):

696

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

697

if not isinstance(alert_dict, dict):

698

continue

699

for alert in alert_dict.values():

700

alert_type = alert.get('type')

701

if not alert_type:

702

continue

703

message = cls._get_text(alert, 'text')

704

if message:

705

yield alert_type, message

706

707

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

708

errors, warnings = [], []

709

for alert_type, alert_message in alerts:

710

if alert_type.lower() == 'error' and fatal:

711

errors.append([alert_type, alert_message])

712

elif alert_message not in self._IGNORED_WARNINGS:

713

warnings.append([alert_type, alert_message])

714

715

for alert_type, alert_message in (warnings + errors[:-1]):

716

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

717

if errors:

718

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

719

720

def _extract_and_report_alerts(self, data, *args, **kwargs):

721

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

722

723

def _extract_badges(self, renderer: dict):

724

privacy_icon_map = {

725

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

726

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

727

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

732

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

733

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

738

'private': BadgeType.AVAILABILITY_PRIVATE,

739

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

740

'live': BadgeType.LIVE_NOW,

741

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

746

badge_type = (

747

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

748

or badge_style_map.get(traverse_obj(badge, 'style'))

749

)

750

if badge_type:

751

badges.append({'type': badge_type})

752

continue

753

754

# fallback, won't work in some languages

755

label = traverse_obj(badge, 'label', expected_type=str, default='')

756

for match, label_badge_type in label_map.items():

757

if match in label.lower():

758

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

765

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

766

767

@staticmethod

768

def _get_text(data, *path_list, max_runs=None):

769

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

774

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

775

obj = [obj]

776

for item in obj:

777

text = try_get(item, lambda x: x['simpleText'], str)

778

if text:

779

return text

780

runs = try_get(item, lambda x: x['runs'], list) or []

781

if not runs and isinstance(item, list):

782

runs = item

783

784

runs = runs[:min(len(runs), max_runs or len(runs))]

785

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

790

count_text = self._get_text(data, *path_list) or ''

791

count = parse_count(count_text)

792

if count is None:

793

count = str_to_int(

794

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

799

"""

800

Extract thumbnails from thumbnails dict

801

@param path_list: path list to level that contains 'thumbnails' key

802

"""

803

thumbnails = []

804

for path in path_list or [()]:

805

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

806

thumbnail_url = url_or_none(thumbnail.get('url'))

807

if not thumbnail_url:

808

continue

809

# Sometimes youtube gives a wrong thumbnail URL. See:

810

# https://github.com/yt-dlp/yt-dlp/issues/233

811

# https://github.com/ytdl-org/youtube-dl/issues/28023

812

if 'maxresdefault' in thumbnail_url:

813

thumbnail_url = thumbnail_url.split('?')[0]

814

thumbnails.append({

815

'url': thumbnail_url,

816

'height': int_or_none(thumbnail.get('height')),

817

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

823

"""

824

Extracts a relative time from string and converts to dt object

825

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

830

if start:

831

return datetime_from_str(start)

832

try:

833

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

838

if not text:

839

return

840

dt = self.extract_relative_time(text)

841

timestamp = None

842

if isinstance(dt, datetime.datetime):

843

timestamp = calendar.timegm(dt.timetuple())

844

845

if timestamp is None:

846

timestamp = (

847

unified_timestamp(text) or unified_timestamp(

848

self._search_regex(

849

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

850

text.lower(), 'time text', default=None)))

851

852

if text and timestamp is None and self._preferred_lang in (None, 'en'):

853

self.report_warning(

854

f'Cannot parse localized time text "{text}"', only_once=True)

855

return timestamp

856

857

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

858

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

859

default_client='web'):

860

for retry in self.RetryManager():

861

try:

862

response = self._call_api(

863

ep=ep, fatal=True, headers=headers,

864

video_id=item_id, query=query, note=note,

865

context=self._extract_context(ytcfg, default_client),

866

api_key=self._extract_api_key(ytcfg, default_client),

867

api_hostname=api_hostname, default_client=default_client)

868

except ExtractorError as e:

869

if not isinstance(e.cause, network_exceptions):

870

return self._error_or_warning(e, fatal=fatal)

871

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

876

if not is_html(first_bytes):

877

yt_error = try_get(

878

self._parse_json(

879

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

880

lambda x: x['error']['message'], str)

881

if yt_error:

882

self._report_alerts([('ERROR', yt_error)], fatal=False)

883

# Downloading page may result in intermittent 5xx HTTP error

884

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

885

# We also want to catch all other network exceptions since errors in later pages can be troublesome

886

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

887

if e.cause.code not in (403, 429):

888

retry.error = e

889

continue

890

return self._error_or_warning(e, fatal=fatal)

891

892

try:

893

self._extract_and_report_alerts(response, only_once=True)

894

except ExtractorError as e:

895

# YouTube servers may return errors we want to retry on in a 200 OK response

896

# See: https://github.com/yt-dlp/yt-dlp/issues/839

897

if 'unknown error' in e.msg.lower():

898

retry.error = e

899

continue

900

return self._error_or_warning(e, fatal=fatal)

901

# Youtube sometimes sends incomplete data

902

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

903

if not traverse_obj(response, *variadic(check_get_keys)):

904

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

911

return re.match(r'https?://music\.youtube\.com/', url) is not None

912

913

def _extract_video(self, renderer):

914

video_id = renderer.get('videoId')

915

916

reel_header_renderer = traverse_obj(renderer, (

917

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

918

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

919

920

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

921

description = self._get_text(renderer, 'descriptionSnippet')

922

923

duration = int_or_none(renderer.get('lengthSeconds'))

924

if duration is None:

925

duration = parse_duration(self._get_text(

926

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

927

if duration is None:

928

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

929

duration = parse_duration(self._search_regex(

930

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

931

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

932

video_id, default=None, group='duration'))

933

934

channel_id = traverse_obj(

935

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

936

expected_type=str, get_all=False)

937

if not channel_id:

938

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

939

940

overlay_style = traverse_obj(

941

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

942

get_all=False, expected_type=str)

943

badges = self._extract_badges(renderer)

944

945

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

946

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

947

expected_type=str)) or ''

948

url = f'https://www.youtube.com/watch?v={video_id}'

949

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

950

url = f'https://www.youtube.com/shorts/{video_id}'

951

952

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

953

or self._get_text(reel_header_renderer, 'timestampText') or '')

954

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

955

956

live_status = (

957

'is_upcoming' if scheduled_timestamp is not None

958

else 'was_live' if 'streamed' in time_text.lower()

959

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

960

else None)

961

962

# videoInfo is a string like '50K views • 10 years ago'.

963

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

964

view_count = (0 if 'no views' in view_count_text.lower()

965

else self._get_count({'simpleText': view_count_text}))

966

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

975

'duration': duration,

976

'channel_id': channel_id,

977

'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')

978

or self._get_text(reel_header_renderer, 'channelTitleText')),

979

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

980

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

981

'timestamp': (self._parse_time_text(time_text)

982

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

983

else None),

984

'release_timestamp': scheduled_timestamp,

985

'availability':

986

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

987

else self._availability(

988

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

989

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

990

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

991

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

992

view_count_field: view_count,

993

'live_status': live_status

}

class YoutubeIE(YoutubeBaseInfoExtractor):

998

IE_DESC = 'YouTube'

999

_VALID_URL = r"""(?x)^

1000

(

1001

(?:https?://|//) # http(s):// or protocol-independent URL

1002

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1003

(?:www\.)?deturl\.com/www\.youtube\.com|

1004

(?:www\.)?pwnyoutube\.com|

1005

(?:www\.)?hooktube\.com|

1006

(?:www\.)?yourepeat\.com|

1007

tube\.majestyc\.net|

1008

%(invidious)s|

1009

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1010

(?:.*?\#/)? # handle anchor (#/) redirect urls

1011

(?: # the various things that can precede the ID:

1012

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

1013

|(?: # or the v= param in all its forms

1014

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1015

(?:\?|\#!?) # the params delimiter ? or # or #!

1016

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1022

vid\.plus| # or vid.plus/xxxx

1023

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1024

%(invidious)s

1025

)/

1026

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1027

)

1028

)? # all until now is optional -> you can pass the naked ID

1029

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1030

(?(1).+)? # if we found the ID, everything can follow

1031

(?:\#|$)""" % {

1032

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1046

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1047

\1''',

1048

# https://wordpress.org/plugins/lazy-load-for-videos/

1049

r'''(?xs)

1050

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1051

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

]

_PLAYER_INFO_RE = (

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1056

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1057

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1058

)

1059

_formats = {

1060

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1061

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1062

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1063

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1064

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1065

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1066

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1067

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1068

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1069

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1070

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1071

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1072

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1073

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1074

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1075

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1076

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1077

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1082

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1083

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1084

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1085

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1086

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1087

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1088

1089

# Apple HTTP Live Streaming

1090

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1091

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1092

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1093

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1094

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1095

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1096

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1097

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1098

1099

# DASH mp4 video

1100

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1101

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1102

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1103

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1104

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1105

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1106

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1107

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1108

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1109

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1110

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1111

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1112

1113

# Dash mp4 audio

1114

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1115

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1116

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1117

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1118

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1119

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1120

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1121

1122

# Dash webm

1123

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1124

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1125

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1126

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1127

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1128

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1129

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1130

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1131

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1132

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1133

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1134

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1135

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1136

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1137

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1138

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1139

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1140

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1141

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1142

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1143

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1144

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1145

1146

# Dash webm audio

1147

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1148

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1149

1150

# Dash webm audio with opus inside

1151

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1152

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1153

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1154

1155

# RTMP (unnamed)

1156

'_rtmp': {'protocol': 'rtmp'},

1157

1158

# av01 video only formats sometimes served with "unknown" codecs

1159

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1160

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1161

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1162

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1163

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1164

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1165

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1166

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1167

}

1168

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1180

'uploader': 'Philipp Hagemeister',

1181

'uploader_id': 'phihag',

1182

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1183

'channel': 'Philipp Hagemeister',

1184

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1185

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1186

'upload_date': '20121002',

1187

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1188

'categories': ['Science & Technology'],

1189

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1194

'playable_in_embed': True,

1195

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1196

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1201

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1206

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1211

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1212

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1213

'uploader': 'SET India',

1214

'uploader_id': 'setindia',

1215

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1216

'age_limit': 18,

1217

},

1218

'skip': 'Private video',

1219

},

1220

{

1221

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1222

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1227

'uploader': 'Philipp Hagemeister',

1228

'uploader_id': 'phihag',

1229

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1230

'channel': 'Philipp Hagemeister',

1231

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1232

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1233

'upload_date': '20121002',

1234

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1235

'categories': ['Science & Technology'],

1236

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1241

'playable_in_embed': True,

1242

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1243

'live_status': 'not_live',

1244

'age_limit': 0,

1245

'comment_count': int,

1246

'channel_follower_count': int

1247

},

1248

'params': {

1249

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1254

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1259

'uploader_id': '8KVIDEO',

1260

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1261

'description': '',

1262

'uploader': '8KVIDEO',

1263

'title': 'UHDTV TEST 8K VIDEO.mp4'

1264

},

1265

'params': {

1266

'youtube_include_dash_manifest': True,

1267

'format': '141',

1268

},

1269

'skip': 'format 141 not served anymore',

1270

},

1271

# DASH manifest with encrypted signature

1272

{

1273

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1278

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1279

'duration': 244,

1280

'uploader': 'AfrojackVEVO',

1281

'uploader_id': 'AfrojackVEVO',

1282

'upload_date': '20131011',

1283

'abr': 129.495,

1284

'like_count': int,

1285

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1286

'playable_in_embed': True,

1287

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1288

'view_count': int,

1289

'track': 'The Spark',

1290

'live_status': 'not_live',

1291

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1292

'channel': 'Afrojack',

1293

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1294

'tags': 'count:19',

1295

'availability': 'public',

1296

'categories': ['Music'],

1297

'age_limit': 0,

1298

'alt_title': 'The Spark',

1299

'channel_follower_count': int

1300

},

1301

'params': {

1302

'youtube_include_dash_manifest': True,

1303

'format': '141/bestaudio[ext=m4a]',

1304

},

1305

},

1306

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1307

{

1308

'note': 'Embed allowed age-gate video',

1309

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1314

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1315

'duration': 142,

1316

'uploader': 'The Witcher',

1317

'uploader_id': 'WitcherGame',

1318

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1319

'upload_date': '20140605',

1320

'age_limit': 18,

1321

'categories': ['Gaming'],

1322

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1323

'availability': 'needs_auth',

1324

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1325

'like_count': int,

1326

'channel': 'The Witcher',

1327

'live_status': 'not_live',

1328

'tags': 'count:17',

1329

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1330

'playable_in_embed': True,

1331

'view_count': int,

1332

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1337

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1342

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1343

'upload_date': '20200408',

1344

'uploader_id': 'FlyingKitty900',

1345

'uploader': 'FlyingKitty',

1346

'age_limit': 18,

1347

'availability': 'needs_auth',

1348

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1349

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1350

'channel': 'FlyingKitty',

1351

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1352

'view_count': int,

1353

'categories': ['Entertainment'],

1354

'live_status': 'not_live',

1355

'tags': ['Flyingkitty', 'godzilla 2'],

1356

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1357

'like_count': int,

1358

'duration': 177,

1359

'playable_in_embed': True,

1360

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1365

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1366

'info_dict': {

1367

'id': 'Tq92D6wQ1mg',

1368

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1369

'ext': 'mp4',

1370

'upload_date': '20191228',

1371

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1372

'uploader': 'Projekt Melody',

1373

'description': 'md5:17eccca93a786d51bc67646756894066',

1374

'age_limit': 18,

1375

'like_count': int,

1376

'availability': 'needs_auth',

1377

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1378

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1379

'view_count': int,

1380

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1381

'channel': 'Projekt Melody',

1382

'live_status': 'not_live',

1383

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1384

'playable_in_embed': True,

1385

'categories': ['Entertainment'],

1386

'duration': 106,

1387

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1388

'comment_count': int,

1389

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1394

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1399

'uploader': 'Herr Lurik',

1400

'uploader_id': 'st3in234',

1401

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1402

'upload_date': '20130730',

1403

'track': 'Such mich find mich',

1404

'age_limit': 0,

1405

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1406

'like_count': int,

1407

'playable_in_embed': False,

1408

'creator': 'OOMPH!',

1409

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1410

'view_count': int,

1411

'alt_title': 'Such mich find mich',

1412

'duration': 210,

1413

'channel': 'Herr Lurik',

1414

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1415

'categories': ['Music'],

1416

'availability': 'public',

1417

'uploader_url': 'http://www.youtube.com/user/st3in234',

1418

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1419

'live_status': 'not_live',

1420

'artist': 'OOMPH!',

1421

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1426

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1427

'only_matching': True,

1428

},

1429

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1430

# YouTube Red ad is not captured for creator

1431

{

1432

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1438

'uploader_id': 'deadmau5',

1439

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1440

'creator': 'deadmau5',

1441

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1442

'uploader': 'deadmau5',

1443

'title': 'Deadmau5 - Some Chords (HD)',

1444

'alt_title': 'Some Chords',

1445

'availability': 'public',

1446

'tags': 'count:14',

1447

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1448

'view_count': int,

1449

'live_status': 'not_live',

1450

'channel': 'deadmau5',

1451

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1452

'like_count': int,

1453

'track': 'Some Chords',

1454

'artist': 'deadmau5',

1455

'playable_in_embed': True,

1456

'age_limit': 0,

1457

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1458

'categories': ['Music'],

1459

'album': 'Some Chords',

1460

'channel_follower_count': int

1461

},

1462

'expected_warnings': [

1463

'DASH manifest missing',

1464

]

1465

},

1466

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1467

{

1468

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1474

'uploader_id': 'olympic',

1475

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1476

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1477

'uploader': 'Olympics',

1478

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1479

'like_count': int,

1480

'release_timestamp': 1343767800,

1481

'playable_in_embed': True,

1482

'categories': ['Sports'],

1483

'release_date': '20120731',

1484

'channel': 'Olympics',

1485

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1486

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1487

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1488

'age_limit': 0,

1489

'availability': 'public',

1490

'live_status': 'was_live',

1491

'view_count': int,

1492

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1493

'channel_follower_count': int

1494

},

1495

'params': {

1496

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1506

'duration': 85,

1507

'upload_date': '20110310',

1508

'uploader_id': 'AllenMeow',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1510

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1511

'uploader': '孫ᄋᄅ',

1512

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1513

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1518

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1519

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1520

'view_count': int,

1521

'categories': ['People & Blogs'],

1522

'like_count': int,

1523

'live_status': 'not_live',

1524

'availability': 'unlisted',

1525

'comment_count': int,

1526

'channel_follower_count': int

1527

},

1528

},

1529

# url_encoded_fmt_stream_map is empty string

1530

{

1531

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1536

'description': '',

1537

'upload_date': '20150404',

1538

'uploader_id': 'spbelect',

1539

'uploader': 'Наблюдатели Петербурга',

1540

},

1541

'params': {

1542

'skip_download': 'requires avconv',

1543

},

1544

'skip': 'This live event has ended.',

1545

},

1546

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1547

{

1548

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1553

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1554

'duration': 220,

1555

'upload_date': '20150625',

1556

'uploader_id': 'dorappi2000',

1557

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1558

'uploader': 'dorappi2000',

1559

'formats': 'mincount:31',

1560

},

1561

'skip': 'not actual anymore',

1562

},

1563

# DASH manifest with segment_list

1564

{

1565

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1566

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1571

'uploader': 'Airtek',

1572

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1573

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1574

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1575

},

1576

'params': {

1577

'youtube_include_dash_manifest': True,

1578

'format': '135', # bestvideo

1579

},

1580

'skip': 'This live event has ended.',

1581

},

1582

{

1583

# Multifeed videos (multiple cameras), URL is for Main Camera

1584

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1585

'info_dict': {

1586

'id': 'jvGDaLqkpTg',

1587

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1588

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1595

'description': 'md5:e03b909557865076822aa169218d6a5d',

1596

'duration': 10643,

1597

'upload_date': '20161111',

1598

'uploader': 'Team PGP',

1599

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1600

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1607

'description': 'md5:e03b909557865076822aa169218d6a5d',

1608

'duration': 10991,

1609

'upload_date': '20161111',

1610

'uploader': 'Team PGP',

1611

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1612

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1619

'description': 'md5:e03b909557865076822aa169218d6a5d',

1620

'duration': 10995,

1621

'upload_date': '20161111',

1622

'uploader': 'Team PGP',

1623

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1624

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1631

'description': 'md5:e03b909557865076822aa169218d6a5d',

1632

'duration': 10990,

1633

'upload_date': '20161111',

1634

'uploader': 'Team PGP',

1635

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1636

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1641

},

1642

'skip': 'Not multifeed anymore',

1643

},

1644

{

1645

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1646

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1647

'info_dict': {

1648

'id': 'gVfLd0zydlo',

1649

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1650

},

1651

'playlist_count': 2,

1652

'skip': 'Not multifeed anymore',

1653

},

1654

{

1655

'url': 'https://vid.plus/FlRa-iH7PGw',

1656

'only_matching': True,

1657

},

1658

{

1659

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1660

'only_matching': True,

1661

},

1662

{

1663

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1664

# Also tests cut-off URL expansion in video description (see

1665

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1666

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1667

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1672

'alt_title': 'Dark Walk',

1673

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1674

'duration': 133,

1675

'upload_date': '20151119',

1676

'uploader_id': 'IronSoulElf',

1677

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1678

'uploader': 'IronSoulElf',

1679

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1680

'track': 'Dark Walk',

1681

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1682

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1683

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1684

'categories': ['Film & Animation'],

1685

'view_count': int,

1686

'live_status': 'not_live',

1687

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1688

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1689

'tags': 'count:13',

1690

'availability': 'public',

1691

'channel': 'IronSoulElf',

1692

'playable_in_embed': True,

1693

'like_count': int,

1694

'age_limit': 0,

1695

'channel_follower_count': int

1696

},

1697

'params': {

1698

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1703

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1704

'only_matching': True,

1705

},

1706

{

1707

# Video with yt:stretch=17:0

1708

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1713

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1714

'upload_date': '20151107',

1715

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1716

'uploader': 'CH GAMER DROID',

1717

},

1718

'params': {

1719

'skip_download': True,

1720

},

1721

'skip': 'This video does not exist.',

1722

},

1723

{

1724

# Video with incomplete 'yt:stretch=16:'

1725

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1726

'only_matching': True,

1727

},

1728

{

1729

# Video licensed under Creative Commons

1730

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1735

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1736

'duration': 721,

1737

'upload_date': '20150128',

1738

'uploader_id': 'BerkmanCenter',

1739

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1740

'uploader': 'The Berkman Klein Center for Internet & Society',

1741

'license': 'Creative Commons Attribution license (reuse allowed)',

1742

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1743

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1744

'like_count': int,

1745

'age_limit': 0,

1746

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1747

'channel': 'The Berkman Klein Center for Internet & Society',

1748

'availability': 'public',

1749

'view_count': int,

1750

'categories': ['Education'],

1751

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1752

'live_status': 'not_live',

1753

'playable_in_embed': True,

1754

'comment_count': int,

1755

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1764

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1769

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1770

'duration': 4060,

1771

'upload_date': '20151120',

1772

'uploader': 'Bernie Sanders',

1773

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1774

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1775

'license': 'Creative Commons Attribution license (reuse allowed)',

1776

'playable_in_embed': True,

1777

'tags': 'count:12',

1778

'like_count': int,

1779

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1780

'age_limit': 0,

1781

'availability': 'public',

1782

'categories': ['News & Politics'],

1783

'channel': 'Bernie Sanders',

1784

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1785

'view_count': int,

1786

'live_status': 'not_live',

1787

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1788

'comment_count': int,

1789

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1798

'only_matching': True,

1799

},

1800

{

1801

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1802

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1803

'only_matching': True,

1804

},

1805

{

1806

# Rental video preview

1807

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1812

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1813

'upload_date': '20150811',

1814

'uploader': 'FlixMatrix',

1815

'uploader_id': 'FlixMatrixKaravan',

1816

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1817

'license': 'Standard YouTube License',

1818

},

1819

'params': {

1820

'skip_download': True,

1821

},

1822

'skip': 'This video is not available.',

1823

},

1824

{

1825

# YouTube Red video with episode data

1826

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1831

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1832

'duration': 2085,

1833

'upload_date': '20170118',

1834

'uploader': 'Vsauce',

1835

'uploader_id': 'Vsauce',

1836

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1837

'series': 'Mind Field',

1838

'season_number': 1,

1839

'episode_number': 1,

1840

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1841

'tags': 'count:12',

1842

'view_count': int,

1843

'availability': 'public',

1844

'age_limit': 0,

1845

'channel': 'Vsauce',

1846

'episode': 'Episode 1',

1847

'categories': ['Entertainment'],

1848

'season': 'Season 1',

1849

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1850

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1851

'like_count': int,

1852

'playable_in_embed': True,

1853

'live_status': 'not_live',

1854

'channel_follower_count': int

1855

},

1856

'params': {

1857

'skip_download': True,

1858

},

1859

'expected_warnings': [

1860

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1865

# as inappropriate or offensive to some audiences.

1866

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1871

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1872

'duration': 965,

1873

'upload_date': '20140124',

1874

'uploader': 'New Century Foundation',

1875

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1876

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1877

},

1878

'params': {

1879

'skip_download': True,

1880

},

1881

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1886

'only_matching': True,

1887

},

1888

{

1889

# geo restricted to JP

1890

'url': 'sJL6WA-aGkQ',

1891

'only_matching': True,

1892

},

1893

{

1894

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1895

'only_matching': True,

1896

},

1897

{

1898

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1899

'only_matching': True,

1900

},

1901

{

1902

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1903

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1904

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1909

'only_matching': True,

1910

},

1911

{

1912

# Video with unsupported adaptive stream type formats

1913

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1918

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1919

'duration': 433,

1920

'upload_date': '20130923',

1921

'uploader': 'Amelia Putri Harwita',

1922

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1923

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1924

'formats': 'maxcount:10',

1925

},

1926

'params': {

1927

'skip_download': True,

1928

'youtube_include_dash_manifest': False,

1929

},

1930

'skip': 'not actual anymore',

1931

},

1932

{

1933

# Youtube Music Auto-generated description

1934

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1939

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1940

'upload_date': '20190312',

1941

'uploader': 'Stephen - Topic',

1942

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1943

'artist': 'Stephen',

1944

'track': 'Voyeur Girl',

1945

'album': 'it\'s too much love to know my dear',

1946

'release_date': '20190313',

1947

'release_year': 2019,

1948

'alt_title': 'Voyeur Girl',

1949

'view_count': int,

1950

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1951

'playable_in_embed': True,

1952

'like_count': int,

1953

'categories': ['Music'],

1954

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1955

'channel': 'Stephen',

1956

'availability': 'public',

1957

'creator': 'Stephen',

1958

'duration': 169,

1959

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1960

'age_limit': 0,

1961

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1962

'tags': 'count:11',

1963

'live_status': 'not_live',

1964

'channel_follower_count': int

1965

},

1966

'params': {

1967

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1972

'only_matching': True,

1973

},

1974

{

1975

# invalid -> valid video id redirection

1976

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1981

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1982

'upload_date': '20090125',

1983

'uploader': 'Prochorowka',

1984

'uploader_id': 'Prochorowka',

1985

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1986

'artist': 'Panjabi MC',

1987

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1988

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1989

},

1990

'params': {

1991

'skip_download': True,

1992

},

1993

'skip': 'Video unavailable',

1994

},

1995

{

1996

# empty description results in an empty string

1997

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2004

'uploader_id': 'ElevageOrVert',

2005

'uploader': 'ElevageOrVert',

2006

'view_count': int,

2007

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2008

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

2009

'like_count': int,

2010

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2011

'tags': [],

2012

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2013

'availability': 'public',

2014

'age_limit': 0,

2015

'categories': ['Pets & Animals'],

2016

'duration': 7,

2017

'playable_in_embed': True,

2018

'live_status': 'not_live',

2019

'channel': 'ElevageOrVert',

2020

'channel_follower_count': int

2021

},

2022

'params': {

2023

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2028

# see [2] for an example with '};' inside ytInitialPlayerResponse

2029

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2030

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2031

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2036

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2037

'upload_date': '20130831',

2038

'uploader_id': 'kudvenkat',

2039

'uploader': 'kudvenkat',

2040

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2041

'like_count': int,

2042

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2043

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2044

'live_status': 'not_live',

2045

'categories': ['Education'],

2046

'availability': 'public',

2047

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2048

'tags': 'count:12',

2049

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2054

'comment_count': int,

2055

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2064

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2065

'only_matching': True,

2066

},

2067

{

2068

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2069

'only_matching': True,

2070

},

2071

{

2072

# https://github.com/ytdl-org/youtube-dl/pull/28094

2073

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2079

'upload_date': '20141120',

2080

'uploader': 'The Cinematic Orchestra - Topic',

2081

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2082

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2083

'artist': 'The Cinematic Orchestra',

2084

'track': 'Burn Out',

2085

'album': 'Every Day',

2086

'like_count': int,

2087

'live_status': 'not_live',

2088

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2093

'creator': 'The Cinematic Orchestra',

2094

'channel': 'The Cinematic Orchestra',

2095

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2096

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2097

'availability': 'public',

2098

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2099

'categories': ['Music'],

2100

'playable_in_embed': True,

2101

'channel_follower_count': int

2102

},

2103

'params': {

2104

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2109

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2110

'only_matching': True,

2111

},

2112

{

2113

# controversial video, requires bpctr/contentCheckOk

2114

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2119

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2120

'uploader': 'CBS Mornings',

2121

'uploader_id': 'CBSThisMorning',

2122

'upload_date': '20140716',

2123

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2124

'duration': 170,

2125

'categories': ['News & Politics'],

2126

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2127

'view_count': int,

2128

'channel': 'CBS Mornings',

2129

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2130

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2131

'age_limit': 18,

2132

'availability': 'needs_auth',

2133

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2134

'like_count': int,

2135

'live_status': 'not_live',

2136

'playable_in_embed': True,

2137

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2142

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2147

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2148

'upload_date': '20201120',

2149

'uploader': 'Walk around Japan',

2150

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2151

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2152

'duration': 1456,

2153

'categories': ['Travel & Events'],

2154

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2155

'view_count': int,

2156

'channel': 'Walk around Japan',

2157

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2158

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2159

'age_limit': 0,

2160

'availability': 'public',

2161

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2162

'live_status': 'not_live',

2163

'playable_in_embed': True,

2164

'channel_follower_count': int

2165

},

2166

'params': {

2167

'skip_download': True,

2168

},

2169

}, {

2170

# Has multiple audio streams

2171

'url': 'WaOKSUlf4TM',

2172

'only_matching': True

2173

}, {

2174

# Requires Premium: has format 141 when requested using YTM url

2175

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2176

'only_matching': True

2177

}, {

2178

# multiple subtitles with same lang_code

2179

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2180

'only_matching': True,

2181

}, {

2182

# Force use android client fallback

2183

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2184

'info_dict': {

2185

'id': 'YOelRv7fMxY',

2186

'title': 'DIGGING A SECRET TUNNEL Part 1',

2187

'ext': '3gp',

2188

'upload_date': '20210624',

2189

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2190

'uploader': 'colinfurze',

2191

'uploader_id': 'colinfurze',

2192

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2193

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2194

'duration': 596,

2195

'categories': ['Entertainment'],

2196

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2197

'view_count': int,

2198

'channel': 'colinfurze',

2199

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2200

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2201

'age_limit': 0,

2202

'availability': 'public',

2203

'like_count': int,

2204

'live_status': 'not_live',

2205

'playable_in_embed': True,

2206

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2211

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2216

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2217

'only_matching': True,

2218

'params': {

2219

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2224

'only_matching': True,

2225

}, {

2226

'note': 'Storyboards',

2227

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2233

'uploader_id': 'scishow',

2234

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2235

'upload_date': '20140324',

2236

'uploader': 'SciShow',

2237

'like_count': int,

2238

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2239

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2240

'view_count': int,

2241

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2242

'playable_in_embed': True,

2243

'tags': 'count:12',

2244

'uploader_url': 'http://www.youtube.com/user/scishow',

2245

'availability': 'public',

2246

'channel': 'SciShow',

2247

'live_status': 'not_live',

2248

'duration': 248,

2249

'categories': ['Education'],

2250

'age_limit': 0,

2251

'channel_follower_count': int,

2252

'chapters': list,

2253

}, 'params': {'format': 'mhtml', 'skip_download': True}

2254

}, {

2255

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2256

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2261

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2262

'uploader': 'Leon Nguyen',

2263

'uploader_id': 'VNSXIII',

2264

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2265

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2266

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2271

'tags': 'count:23',

2272

'playable_in_embed': True,

2273

'live_status': 'not_live',

2274

'upload_date': '20220103',

2275

'like_count': int,

2276

'availability': 'public',

2277

'channel': 'Leon Nguyen',

2278

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2279

'comment_count': int,

2280

'channel_follower_count': int

2281

}

2282

}, {

2283

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2284

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2289

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2290

'uploader': 'Leon Nguyen',

2291

'uploader_id': 'VNSXIII',

2292

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2293

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2294

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2299

'tags': 'count:23',

2300

'playable_in_embed': True,

2301

'live_status': 'not_live',

2302

'upload_date': '20220102',

2303

'like_count': int,

2304

'availability': 'public',

2305

'channel': 'Leon Nguyen',

2306

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2307

'comment_count': int,

2308

'channel_follower_count': int

2309

},

2310

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2311

}, {

2312

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2313

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2318

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2319

'uploader': 'Quackity',

2320

'uploader_id': 'QuackityHQ',

2321

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2322

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2323

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2328

'tags': 'count:26',

2329

'playable_in_embed': True,

2330

'live_status': 'not_live',

2331

'release_timestamp': 1641172509,

2332

'release_date': '20220103',

2333

'upload_date': '20220103',

2334

'like_count': int,

2335

'availability': 'public',

2336

'channel': 'Quackity',

2337

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2338

'channel_follower_count': int

2339

}

2340

},

2341

{ # continuous livestream. Microformat upload date should be preferred.

2342

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2343

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2344

'info_dict': {

2345

'id': 'kgx4WGK0oNU',

2346

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2347

'ext': 'mp4',

2348

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2349

'availability': 'public',

2350

'age_limit': 0,

2351

'release_timestamp': 1637975704,

2352

'upload_date': '20210619',

2353

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2354

'live_status': 'is_live',

2355

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2356

'uploader': '阿鲍Abao',

2357

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2358

'channel': 'Abao in Tokyo',

2359

'channel_follower_count': int,

2360

'release_date': '20211127',

2361

'tags': 'count:39',

2362

'categories': ['People & Blogs'],

2363

'like_count': int,

2364

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2365

'view_count': int,

2366

'playable_in_embed': True,

2367

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2368

'concurrent_view_count': int,

2369

},

2370

'params': {'skip_download': True}

2371

}, {

2372

# Story. Requires specific player params to work.

2373

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2378

'view_count': int,

2379

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2380

'upload_date': '20220526',

2381

'categories': ['Education'],

2382

'title': 'Story',

2383

'channel': 'IT\'S HISTORY',

2384

'description': '',

2385

'uploader_id': 'BlastfromthePast',

2386

'duration': 12,

2387

'uploader': 'IT\'S HISTORY',

2388

'playable_in_embed': True,

2389

'age_limit': 0,

2390

'live_status': 'not_live',

2391

'tags': [],

2392

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2393

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2394

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2395

},

2396

'skip': 'stories get removed after some period of time',

2397

}, {

2398

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2403

'upload_date': '20220323',

2404

'like_count': int,

2405

'availability': 'unlisted',

2406

'channel': 'nao20010128nao',

2407

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2408

'age_limit': 0,

2409

'uploader': 'nao20010128nao',

2410

'uploader_id': 'nao20010128nao',

2411

'categories': ['Music'],

2412

'view_count': int,

2413

'description': '',

2414

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2415

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2416

'live_status': 'not_live',

2417

'playable_in_embed': True,

2418

'channel_follower_count': int,

2419

'duration': 6,

2420

'tags': [],

2421

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2422

}

2423

}, {

2424

# Prefer primary title+description language metadata by default

2425

# Do not prefer translated description if primary is empty

2426

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2431

'description': '',

2432

'channel': 'cole-dlp-test-acc',

2433

'tags': [],

2434

'view_count': int,

2435

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2436

'like_count': int,

2437

'playable_in_embed': True,

2438

'availability': 'unlisted',

2439

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2440

'age_limit': 0,

2441

'duration': 5,

2442

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2443

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2444

'live_status': 'not_live',

2445

'upload_date': '20220908',

2446

'categories': ['People & Blogs'],

2447

'uploader': 'cole-dlp-test-acc',

2448

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2449

},

2450

'params': {'skip_download': True}

2451

}, {

2452

# Extractor argument: prefer translated title+description

2453

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2458

'tags': [],

2459

'duration': 5,

2460

'live_status': 'not_live',

2461

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2462

'upload_date': '20220728',

2463

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2464

'view_count': int,

2465

'categories': ['People & Blogs'],

2466

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2467

'title': 'dlp test video title translated (fr)',

2468

'availability': 'public',

2469

'uploader': 'cole-dlp-test-acc',

2470

'age_limit': 0,

2471

'description': 'dlp test video description translated (fr)',

2472

'playable_in_embed': True,

2473

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2474

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2475

},

2476

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2477

'expected_warnings': [r'Preferring "fr" translated fields'],

2478

}, {

2479

'note': '6 channel audio',

2480

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2481

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2487

{

2488

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2489

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2494

'upload_date': '20080526',

2495

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2496

'uploader': 'Christopher Sykes',

2497

'uploader_id': 'ChristopherJSykes',

2498

'age_limit': 0,

2499

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2500

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2501

'playable_in_embed': True,

2502

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2503

'like_count': int,

2504

'comment_count': int,

2505

'channel': 'Christopher Sykes',

2506

'live_status': 'not_live',

2507

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2508

'availability': 'public',

2509

'duration': 195,

2510

'view_count': int,

2511

'categories': ['Science & Technology'],

2512

'channel_follower_count': int,

2513

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2514

},

2515

'params': {

2516

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2523

from ..utils import parse_qs

2524

2525

qs = parse_qs(url)

2526

if qs.get('list', [None])[0]:

2527

return False

2528

return super().suitable(url)

2529

2530

def __init__(self, *args, **kwargs):

2531

super().__init__(*args, **kwargs)

2532

self._code_cache = {}

2533

self._player_cache = {}

2534

2535

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2536

lock = threading.Lock()

2537

start_time = time.time()

2538

formats = [f for f in formats if f.get('is_from_start')]

2539

2540

def refetch_manifest(format_id, delay):

2541

nonlocal formats, start_time, is_live

2542

if time.time() <= start_time + delay:

2543

return

2544

2545

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2546

video_details = traverse_obj(

2547

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2548

microformats = traverse_obj(

2549

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2550

expected_type=dict, default=[])

2551

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2552

is_live = live_status == 'is_live'

2553

start_time = time.time()

2554

2555

def mpd_feed(format_id, delay):

2556

"""

2557

@returns (manifest_url, manifest_stream_number, is_live) or None

2558

"""

2559

with lock:

2560

refetch_manifest(format_id, delay)

2561

2562

f = next((f for f in formats if f['format_id'] == format_id), None)

2563

if not f:

2564

if not is_live:

2565

self.to_screen(f'{video_id}: Video is no longer live')

2566

else:

2567

self.report_warning(

2568

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2569

return None

2570

return f['manifest_url'], f['manifest_stream_number'], is_live

2571

2572

for f in formats:

2573

f['is_live'] = is_live

2574

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2575

live_start_time, mpd_feed, not is_live and f.copy())

2576

if is_live:

2577

f['fragments'] = gen

2578

f['protocol'] = 'http_dash_segments_generator'

2579

else:

2580

f['fragments'] = LazyList(gen({}))

2581

del f['is_from_start']

2582

2583

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2584

FETCH_SPAN, MAX_DURATION = 5, 432000

2585

2586

mpd_url, stream_number, is_live = None, None, True

2587

2588

begin_index = 0

2589

download_start_time = ctx.get('start') or time.time()

2590

2591

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2592

if lack_early_segments:

2593

self.report_warning(bug_reports_message(

2594

'Starting download from the last 120 hours of the live stream since '

2595

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2596

lack_early_segments = True

2597

2598

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2599

fragments, fragment_base_url = None, None

2600

2601

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2602

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2603

# Obtain from MPD's maximum seq value

2604

old_mpd_url = mpd_url

2605

last_error = ctx.pop('last_error', None)

2606

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2607

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2608

or (mpd_url, stream_number, False))

2609

if not refresh_sequence:

2610

if expire_fast and not is_live:

2611

return False, last_seq

2612

elif old_mpd_url == mpd_url:

2613

return True, last_seq

2614

if manifestless_orig_fmt:

2615

fmt_info = manifestless_orig_fmt

2616

else:

2617

try:

2618

fmts, _ = self._extract_mpd_formats_and_subtitles(

2619

mpd_url, None, note=False, errnote=False, fatal=False)

2620

except ExtractorError:

2621

fmts = None

2622

if not fmts:

2623

no_fragment_score += 2

2624

return False, last_seq

2625

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2626

fragments = fmt_info['fragments']

2627

fragment_base_url = fmt_info['fragment_base_url']

2628

assert fragment_base_url

2629

2630

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2631

return True, _last_seq

2632

2633

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2634

while is_live:

2635

fetch_time = time.time()

2636

if no_fragment_score > 30:

2637

return

2638

if last_segment_url:

2639

# Obtain from "X-Head-Seqnum" header value from each segment

2640

try:

2641

urlh = self._request_webpage(

2642

last_segment_url, None, note=False, errnote=False, fatal=False)

2643

except ExtractorError:

2644

urlh = None

2645

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2646

if last_seq is None:

2647

no_fragment_score += 2

2648

last_segment_url = None

2649

continue

2650

else:

2651

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2652

no_fragment_score += 2

2653

if not should_continue:

2654

continue

2655

2656

if known_idx > last_seq:

2657

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2663

# skip from the start when it's negative value

2664

known_idx = last_seq + begin_index

2665

if lack_early_segments:

2666

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2667

try:

2668

for idx in range(known_idx, last_seq):

2669

# do not update sequence here or you'll get skipped some part of it

2670

should_continue, _ = _extract_sequence_from_mpd(False, False)

2671

if not should_continue:

2672

known_idx = idx - 1

2673

raise ExtractorError('breaking out of outer loop')

2674

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2675

yield {

2676

'url': last_segment_url,

2677

'fragment_count': last_seq,

2678

}

2679

if known_idx == last_seq:

2680

no_fragment_score += 5

2681

else:

2682

no_fragment_score = 0

2683

known_idx = last_seq

2684

except ExtractorError:

2685

continue

2686

2687

if manifestless_orig_fmt:

2688

# Stop at the first iteration if running for post-live manifestless;

2689

# fragment count no longer increase since it starts

2690

break

2691

2692

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2693

2694

def _extract_player_url(self, *ytcfgs, webpage=None):

2695

player_url = traverse_obj(

2696

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2697

get_all=False, expected_type=str)

2698

if not player_url:

2699

return

2700

return urljoin('https://www.youtube.com', player_url)

2701

2702

def _download_player_url(self, video_id, fatal=False):

2703

res = self._download_webpage(

2704

'https://www.youtube.com/iframe_api',

2705

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2706

if res:

2707

player_version = self._search_regex(

2708

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2709

if player_version:

2710

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2711

2712

def _signature_cache_id(self, example_sig):

2713

""" Return a string representation of a signature """

2714

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2715

2716

@classmethod

2717

def _extract_player_info(cls, player_url):

2718

for player_re in cls._PLAYER_INFO_RE:

2719

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2724

return id_m.group('id')

2725

2726

def _load_player(self, video_id, player_url, fatal=True):

2727

player_id = self._extract_player_info(player_url)

2728

if player_id not in self._code_cache:

2729

code = self._download_webpage(

2730

player_url, video_id, fatal=fatal,

2731

note='Downloading player ' + player_id,

2732

errnote='Download of %s failed' % player_url)

2733

if code:

2734

self._code_cache[player_id] = code

2735

return self._code_cache.get(player_id)

2736

2737

def _extract_signature_function(self, video_id, player_url, example_sig):

2738

player_id = self._extract_player_info(player_url)

2739

2740

# Read from filesystem cache

2741

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2742

assert os.path.basename(func_id) == func_id

2743

2744

self.write_debug(f'Extracting signature function {func_id}')

2745

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2746

2747

if not cache_spec:

2748

code = self._load_player(video_id, player_url)

2749

if code:

2750

res = self._parse_sig_js(code)

2751

test_string = ''.join(map(chr, range(len(example_sig))))

2752

cache_spec = [ord(c) for c in res(test_string)]

2753

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2754

2755

return lambda s: ''.join(s[i] for i in cache_spec)

2756

2757

def _print_sig_code(self, func, example_sig):

2758

if not self.get_param('youtube_print_sig_code'):

2759

return

2760

2761

def gen_sig_code(idxs):

2762

def _genslice(start, end, step):

2763

starts = '' if start == 0 else str(start)

2764

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2765

steps = '' if step == 1 else (':%d' % step)

2766

return f's[{starts}{ends}{steps}]'

2767

2768

step = None

2769

# Quelch pyflakes warnings - start will be set when step is set

2770

start = '(Never used)'

2771

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2776

step = None

2777

continue

2778

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2788

2789

test_string = ''.join(map(chr, range(len(example_sig))))

2790

cache_res = func(test_string)

2791

cache_spec = [ord(c) for c in cache_res]

2792

expr_code = ' + '.join(gen_sig_code(cache_spec))

2793

signature_id_tuple = '(%s)' % (

2794

', '.join(str(len(p)) for p in example_sig.split('.')))

2795

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2796

' return %s\n') % (signature_id_tuple, expr_code)

2797

self.to_screen('Extracted signature function:\n' + code)

2798

2799

def _parse_sig_js(self, jscode):

2800

funcname = self._search_regex(

2801

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2802

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2803

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2804

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2805

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2806

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2807

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2808

# Obsolete patterns

2809

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2810

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2811

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2812

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2813

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2814

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2815

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2816

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2817

jscode, 'Initial JS player signature function name', group='sig')

2818

2819

jsi = JSInterpreter(jscode)

2820

initial_function = jsi.extract_function(funcname)

2821

return lambda s: initial_function([s])

2822

2823

def _cached(self, func, *cache_id):

2824

def inner(*args, **kwargs):

2825

if cache_id not in self._player_cache:

2826

try:

2827

self._player_cache[cache_id] = func(*args, **kwargs)

2828

except ExtractorError as e:

2829

self._player_cache[cache_id] = e

2830

except Exception as e:

2831

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2832

2833

ret = self._player_cache[cache_id]

2834

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2840

"""Turn the encrypted s field into a working signature"""

2841

extract_sig = self._cached(

2842

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2843

func = extract_sig(video_id, player_url, s)

2844

self._print_sig_code(func, s)

2845

return func(s)

2846

2847

def _decrypt_nsig(self, s, video_id, player_url):

2848

"""Turn the encrypted n field into a working signature"""

2849

if player_url is None:

2850

raise ExtractorError('Cannot decrypt nsig without player_url')

2851

player_url = urljoin('https://www.youtube.com', player_url)

2852

2853

try:

2854

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2855

except ExtractorError as e:

2856

raise ExtractorError('Unable to extract nsig function code', cause=e)

2857

if self.get_param('youtube_print_sig_code'):

2858

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2859

2860

try:

2861

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2862

ret = extract_nsig(jsi, func_code)(s)

2863

except JSInterpreter.Exception as e:

2864

try:

2865

jsi = PhantomJSwrapper(self, timeout=5000)

2866

except ExtractorError:

2867

raise e

2868

self.report_warning(

2869

f'Native nsig extraction failed: Trying with PhantomJS\n'

2870

f' n = {s} ; player = {player_url}', video_id)

2871

self.write_debug(e, only_once=True)

2872

2873

args, func_body = func_code

2874

ret = jsi.execute(

2875

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2876

video_id=video_id, note='Executing signature code').strip()

2877

2878

self.write_debug(f'Decrypted nsig {s} => {ret}')

2879

return ret

2880

2881

def _extract_n_function_name(self, jscode):

2882

funcname, idx = self._search_regex(

2883

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2884

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2889

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2890

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2891

2892

def _extract_n_function_code(self, video_id, player_url):

2893

player_id = self._extract_player_info(player_url)

2894

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

2895

jscode = func_code or self._load_player(video_id, player_url)

2896

jsi = JSInterpreter(jscode)

2897

2898

if func_code:

2899

return jsi, player_id, func_code

2900

2901

func_name = self._extract_n_function_name(jscode)

2902

2903

# For redundancy

2904

func_code = self._search_regex(

2905

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

2906

# NB: The end of the regex is intentionally kept strict

2907

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

2908

jscode, 'nsig function', group=('var', 'code'), default=None)

2909

if func_code:

2910

func_code = ([func_code[0]], func_code[1])

2911

else:

2912

self.write_debug('Extracting nsig function with jsinterp')

2913

func_code = jsi.extract_function_code(func_name)

2914

2915

self.cache.store('youtube-nsig', player_id, func_code)

2916

return jsi, player_id, func_code

2917

2918

def _extract_n_function_from_code(self, jsi, func_code):

2919

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2925

raise

2926

except Exception as e:

2927

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2928

2929

if ret.startswith('enhanced_except_'):

2930

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2936

"""

2937

Extract signatureTimestamp (sts)

2938

Required to tell API what sig/player version is in use.

2939

"""

2940

sts = None

2941

if isinstance(ytcfg, dict):

2942

sts = int_or_none(ytcfg.get('STS'))

2943

2944

if not sts:

2945

# Attempt to extract from player

2946

if player_url is None:

2947

error_msg = 'Cannot extract signature timestamp without player_url.'

2948

if fatal:

2949

raise ExtractorError(error_msg)

2950

self.report_warning(error_msg)

2951

return

2952

code = self._load_player(video_id, player_url, fatal=fatal)

2953

if code:

2954

sts = int_or_none(self._search_regex(

2955

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2956

'JS player signature timestamp', group='sts', fatal=fatal))

2957

return sts

2958

2959

def _mark_watched(self, video_id, player_responses):

2960

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2961

label = 'fully ' if is_full else ''

2962

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2963

expected_type=url_or_none)

2964

if not url:

2965

self.report_warning(f'Unable to mark {label}watched')

2966

return

2967

parsed_url = urllib.parse.urlparse(url)

2968

qs = urllib.parse.parse_qs(parsed_url.query)

2969

2970

# cpn generation algorithm is reverse engineered from base.js.

2971

# In fact it works even with dummy cpn.

2972

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2973

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2974

2975

# # more consistent results setting it to right before the end

2976

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2987

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

2994

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2995

2996

self._download_webpage(

2997

url, video_id, f'Marking {label}watched',

2998

'Unable to mark watched', fatal=False)

2999

3000

@classmethod

3001

def _extract_from_webpage(cls, url, webpage):

3002

# Invidious Instances

3003

# https://github.com/yt-dlp/yt-dlp/issues/195

3004

# https://github.com/iv-org/invidious/pull/1730

3005

mobj = re.search(

3006

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3007

webpage)

3008

if mobj:

3009

yield cls.url_result(mobj.group('url'), cls)

3010

raise cls.StopExtraction()

3011

3012

yield from super()._extract_from_webpage(url, webpage)

3013

3014

# lazyYT YouTube embed

3015

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3016

yield cls.url_result(unescapeHTML(id_), cls, id_)

3017

3018

# Wordpress "YouTube Video Importer" plugin

3019

for m in re.findall(r'''(?x)<div[^>]+

3020

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3021

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3022

yield cls.url_result(m[-1], cls, m[-1])

3023

3024

@classmethod

3025

def extract_id(cls, url):

3026

video_id = cls.get_temp_id(url)

3027

if not video_id:

3028

raise ExtractorError(f'Invalid URL: {url}')

3029

return video_id

3030

3031

def _extract_chapters_from_json(self, data, duration):

3032

chapter_list = traverse_obj(

3033

data, (

3034

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3035

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3036

), expected_type=list)

3037

3038

return self._extract_chapters(

3039

chapter_list,

3040

chapter_time=lambda chapter: float_or_none(

3041

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3042

chapter_title=lambda chapter: traverse_obj(

3043

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3044

duration=duration)

3045

3046

def _extract_chapters_from_engagement_panel(self, data, duration):

3047

content_list = traverse_obj(

3048

data,

3049

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3050

expected_type=list, default=[])

3051

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3052

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3053

3054

return next(filter(None, (

3055

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3056

chapter_time, chapter_title, duration)

3057

for contents in content_list)), [])

3058

3059

def _extract_chapters_from_description(self, description, duration):

3060

duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'

3061

sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'

3062

return self._extract_chapters(

3063

re.findall(sep_re % (duration_re, r'.+?'), description or ''),

3064

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3065

duration=duration, strict=False) or self._extract_chapters(

3066

re.findall(sep_re % (r'.+?', duration_re), description or ''),

3067

chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],

3068

duration=duration, strict=False)

3069

3070

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3075

'title': chapter_title(chapter),

3076

} for chapter in chapter_list or []]

3077

if not strict:

3078

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3079

3080

chapters = [{'start_time': 0}]

3081

for idx, chapter in enumerate(chapter_list):

3082

if chapter['start_time'] is None:

3083

self.report_warning(f'Incomplete chapter {idx}')

3084

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3085

chapters.append(chapter)

3086

elif chapter not in chapters:

3087

self.report_warning(

3088

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3089

return chapters[1:]

3090

3091

def _extract_comment(self, comment_renderer, parent=None):

3092

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3097

3098

# Timestamp is an estimate calculated from the current time and time_text

3099

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3100

timestamp = self._parse_time_text(time_text)

3101

3102

author = self._get_text(comment_renderer, 'authorText')

3103

author_id = try_get(comment_renderer,

3104

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3105

3106

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3107

lambda x: x['likeCount']), str)) or 0

3108

author_thumbnail = try_get(comment_renderer,

3109

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3110

3111

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3112

is_favorited = 'creatorHeart' in (try_get(

3113

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3118

'time_text': time_text,

3119

'like_count': votes,

3120

'is_favorited': is_favorited,

3121

'author': author,

3122

'author_id': author_id,

3123

'author_thumbnail': author_thumbnail,

3124

'author_is_uploader': author_is_uploader,

3125

'parent': parent or 'root'

3126

}

3127

3128

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3129

3130

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3131

3132

def extract_header(contents):

3133

_continuation = None

3134

for content in contents:

3135

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3136

expected_comment_count = self._get_count(

3137

comments_header_renderer, 'countText', 'commentsCount')

3138

3139

if expected_comment_count:

3140

tracker['est_total'] = expected_comment_count

3141

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3142

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3143

3144

sort_menu_item = try_get(

3145

comments_header_renderer,

3146

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3147

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3148

3149

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3150

if not _continuation:

3151

continue

3152

3153

sort_text = str_or_none(sort_menu_item.get('title'))

3154

if not sort_text:

3155

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3156

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3161

if not parent:

3162

tracker['current_page_thread'] = 0

3163

for content in contents:

3164

if not parent and tracker['total_parent_comments'] >= max_parents:

3165

yield

3166

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3167

comment_renderer = get_first(

3168

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3169

expected_type=dict, default={})

3170

3171

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3176

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3177

yield comment

3178

3179

# Attempt to get the replies

3180

comment_replies_renderer = try_get(

3181

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3182

3183

if comment_replies_renderer:

3184

tracker['current_page_thread'] += 1

3185

comment_entries_iter = self._comment_entries(

3186

comment_replies_renderer, ytcfg, video_id,

3187

parent=comment.get('id'), tracker=tracker)

3188

yield from itertools.islice(comment_entries_iter, min(

3189

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3190

3191

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3197

total_parent_comments=0,

3198

total_reply_comments=0)

3199

3200

# TODO: Deprecated

3201

# YouTube comments have a max depth of 2

3202

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3203

if max_depth:

3204

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3205

'Set max replies in the max-comments extractor argument instead')

3206

if max_depth == 1 and parent:

3207

return

3208

3209

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3210

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3211

3212

continuation = self._extract_continuation(root_continuation_data)

3213

3214

response = None

3215

is_forced_continuation = False

3216

is_first_continuation = parent is None

3217

if is_first_continuation and not continuation:

3218

# Sometimes you can get comments by generating the continuation yourself,

3219

# even if YouTube initially reports them being disabled - e.g. stories comments.

3220

# Note: if the comment section is actually disabled, YouTube may return a response with

3221

# required check_get_keys missing. So we will disable that check initially in this case.

3222

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3223

is_forced_continuation = True

3224

3225

for page_num in itertools.count(0):

3226

if not continuation:

3227

break

3228

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3229

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3230

if page_num == 0:

3231

if is_first_continuation:

3232

note_prefix = 'Downloading comment section API JSON'

3233

else:

3234

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3235

tracker['current_page_thread'], comment_prog_str)

3236

else:

3237

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3238

' ' if parent else '', ' replies' if parent else '',

3239

page_num, comment_prog_str)

3240

try:

3241

response = self._extract_response(

3242

item_id=None, query=continuation,

3243

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3244

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3245

except ExtractorError as e:

3246

# Ignore incomplete data error for replies if retries didn't work.

3247

# This is to allow any other parent comments and comment threads to be downloaded.

3248

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3249

if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:

3250

self.report_warning(

3251

'Received incomplete data for a comment reply thread and retrying did not help. '

3252

'Ignoring to let other comments be downloaded.')

3253

else:

3254

raise

3255

is_forced_continuation = False

3256

continuation_contents = traverse_obj(

3257

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3258

3259

continuation = None

3260

for continuation_section in continuation_contents:

3261

continuation_items = traverse_obj(

3262

continuation_section,

3263

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3264

get_all=False, expected_type=list) or []

3265

if is_first_continuation:

3266

continuation = extract_header(continuation_items)

3267

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3281

if message and not parent and tracker['running_total'] == 0:

3282

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3283

raise self.CommentsDisabled

3284

3285

@staticmethod

3286

def _generate_comment_continuation(video_id):

3287

"""

3288

Generates initial comment section continuation token from given video id

3289

"""

3290

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3291

return base64.b64encode(token.encode()).decode()

3292

3293

def _get_comments(self, ytcfg, video_id, contents, webpage):

3294

"""Entry for comment extraction"""

3295

def _real_comment_extract(contents):

3296

renderer = next((

3297

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3298

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3299

yield from self._comment_entries(renderer, ytcfg, video_id)

3300

3301

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3302

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3303

3304

@staticmethod

3305

def _get_checkok_params():

3306

return {'contentCheckOk': True, 'racyCheckOk': True}

3307

3308

@classmethod

3309

def _generate_player_context(cls, sts=None):

3310

context = {

3311

'html5Preference': 'HTML5_PREF_WANTS',

3312

}

3313

if sts is not None:

3314

context['signatureTimestamp'] = sts

3315

return {

3316

'playbackContext': {

3317

'contentPlaybackContext': context

3318

},

3319

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3324

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3325

return True

3326

3327

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3328

AGE_GATE_REASONS = (

3329

'confirm your age', 'age-restricted', 'inappropriate', # reason

3330

'age_verification_required', 'age_check_required', # status

3331

)

3332

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3333

3334

@staticmethod

3335

def _is_unplayable(player_response):

3336

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3337

3338

_STORY_PLAYER_PARAMS = '8AEB'

3339

3340

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3341

3342

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3343

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3344

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3345

headers = self.generate_api_headers(

3346

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3352

yt_query['params'] = self._STORY_PLAYER_PARAMS

3353

3354

yt_query.update(self._generate_player_context(sts))

3355

return self._extract_response(

3356

item_id=video_id, ep='player', query=yt_query,

3357

ytcfg=player_ytcfg, headers=headers, fatal=True,

3358

default_client=client,

3359

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3360

) or None

3361

3362

def _get_requested_clients(self, url, smuggled_data):

3363

requested_clients = []

3364

default = ['android', 'web']

3365

allowed_clients = sorted(

3366

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3367

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3368

for client in self._configuration_arg('player_client'):

3369

if client in allowed_clients:

3370

requested_clients.append(client)

3371

elif client == 'default':

3372

requested_clients.extend(default)

3373

elif client == 'all':

3374

requested_clients.extend(allowed_clients)

3375

else:

3376

self.report_warning(f'Skipping unsupported client {client}')

3377

if not requested_clients:

3378

requested_clients = default

3379

3380

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3381

requested_clients.extend(

3382

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3383

3384

return orderedSet(requested_clients)

3385

3386

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3387

initial_pr = None

3388

if webpage:

3389

initial_pr = self._search_json(

3390

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3391

3392

all_clients = set(clients)

3393

clients = clients[::-1]

3394

prs = []

3395

3396

def append_client(*client_names):

3397

""" Append the first client name that exists but not already used """

3398

for client_name in client_names:

3399

actual_client = _split_innertube_client(client_name)[0]

3400

if actual_client in INNERTUBE_CLIENTS:

3401

if actual_client not in all_clients:

3402

clients.append(client_name)

3403

all_clients.add(actual_client)

3404

return

3405

3406

# Android player_response does not have microFormats which are needed for

3407

# extraction of some data. So we return the initial_pr with formats

3408

# stripped out even if not requested by the user

3409

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3410

if initial_pr:

3411

pr = dict(initial_pr)

3412

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3417

player_url = None

3418

while clients:

3419

client, base_client, variant = _split_innertube_client(clients.pop())

3420

player_ytcfg = master_ytcfg if client == 'web' else {}

3421

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3422

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3423

3424

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3425

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3426

if 'js' in self._configuration_arg('player_skip'):

3427

require_js_player = False

3428

player_url = None

3429

3430

if not player_url and not tried_iframe_fallback and require_js_player:

3431

player_url = self._download_player_url(video_id)

3432

tried_iframe_fallback = True

3433

3434

try:

3435

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3436

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3437

except ExtractorError as e:

3438

if last_error:

3439

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3445

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3446

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3447

if pr_video_id and pr_video_id != video_id:

3448

self.report_warning(

3449

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3454

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3455

append_client(f'{base_client}_creator')

3456

elif self._is_agegated(pr):

3457

if variant == 'tv_embedded':

3458

append_client(f'{base_client}_embedded')

3459

elif not variant:

3460

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3466

return prs, player_url

3467

3468

def _needs_live_processing(self, live_status, duration):

3469

if (live_status == 'is_live' and self.get_param('live_from_start')

3470

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3471

return live_status

3472

3473

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3474

itags, stream_ids = {}, []

3475

itag_qualities, res_qualities = {}, {0: None}

3476

q = qualities([

3477

# Normally tiny is the smallest video-only formats. But

3478

# audio-only formats with unknown quality may get tagged as tiny

3479

'tiny',

3480

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3481

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3482

])

3483

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3484

3485

for fmt in streaming_formats:

3486

if fmt.get('targetDurationSec'):

3487

continue

3488

3489

itag = str_or_none(fmt.get('itag'))

3490

audio_track = fmt.get('audioTrack') or {}

3491

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3492

if stream_id in stream_ids:

3493

continue

3494

3495

quality = fmt.get('quality')

3496

height = int_or_none(fmt.get('height'))

3497

if quality == 'tiny' or not quality:

3498

quality = fmt.get('audioQuality', '').lower() or quality

3499

# The 3gp format (17) in android client has a quality of "small",

3500

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3506

if height:

3507

res_qualities[height] = quality

3508

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3509

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3510

# number of fragment that would subsequently requested with (`&sq=N`)

3511

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3512

continue

3513

3514

fmt_url = fmt.get('url')

3515

if not fmt_url:

3516

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3517

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3518

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3519

if not all((sc, fmt_url, player_url, encrypted_sig)):

3520

continue

3521

try:

3522

fmt_url += '&%s=%s' % (

3523

traverse_obj(sc, ('sp', -1)) or 'signature',

3524

self._decrypt_signature(encrypted_sig, video_id, player_url)

3525

)

3526

except ExtractorError as e:

3527

self.report_warning('Signature extraction failed: Some formats may be missing',

3528

video_id=video_id, only_once=True)

3529

self.write_debug(e, only_once=True)

3530

continue

3531

3532

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3537

fmt_url = update_url_query(fmt_url, {

3538

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3539

})

3540

except ExtractorError as e:

3541

phantomjs_hint = ''

3542

if isinstance(e, JSInterpreter.Exception):

3543

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3544

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3545

if player_url:

3546

self.report_warning(

3547

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3548

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3549

self.write_debug(e, only_once=True)

3550

else:

3551

self.report_warning(

3552

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3553

video_id=video_id, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3558

stream_ids.append(stream_id)

3559

3560

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3561

language_preference = (

3562

10 if audio_track.get('audioIsDefault') and 10

3563

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3564

else -1)

3565

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3566

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3567

# Make sure to avoid false positives with small duration differences.

3568

# E.g. __2ABJjxzNo, ySuUZEjARPY

3569

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3570

if is_damaged:

3571

self.report_warning(

3572

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3573

dct = {

3574

'asr': int_or_none(fmt.get('audioSampleRate')),

3575

'filesize': int_or_none(fmt.get('contentLength')),

3576

'format_id': itag,

3577

'format_note': join_nonempty(

3578

'%s%s' % (audio_track.get('displayName') or '',

3579

' (default)' if language_preference > 0 else ''),

3580

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3581

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3582

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3583

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3584

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3585

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3586

'fps': int_or_none(fmt.get('fps')) or None,

3587

'audio_channels': fmt.get('audioChannels'),

3588

'height': height,

3589

'quality': q(quality),

3590

'has_drm': bool(fmt.get('drmFamilies')),

3591

'tbr': tbr,

3592

'url': fmt_url,

3593

'width': int_or_none(fmt.get('width')),

3594

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3595

'desc' if language_preference < -1 else ''),

3596

'language_preference': language_preference,

3597

# Strictly de-prioritize damaged and 3gp formats

3598

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3599

}

3600

mime_mobj = re.match(

3601

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3602

if mime_mobj:

3603

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3604

dct.update(parse_codecs(mime_mobj.group(2)))

3605

no_audio = dct.get('acodec') == 'none'

3606

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3612

dct['downloader_options'] = {

3613

# Youtube throttles chunks >~10M

3614

'http_chunk_size': 10485760,

3615

}

3616

if dct.get('ext'):

3617

dct['container'] = dct['ext'] + '_dash'

3618

yield dct

3619

3620

needs_live_processing = self._needs_live_processing(live_status, duration)

3621

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3622

3623

skip_manifests = set(self._configuration_arg('skip'))

3624

if (not self.get_param('youtube_include_hls_manifest', True)

3625

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3626

or needs_live_processing and skip_bad_formats):

3627

skip_manifests.add('hls')

3628

3629

if not self.get_param('youtube_include_dash_manifest', True):

3630

skip_manifests.add('dash')

3631

if self._configuration_arg('include_live_dash'):

3632

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3633

'Use include_incomplete_formats extractor argument instead')

3634

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3635

skip_manifests.add('dash')

3636

3637

def process_manifest_format(f, proto, itag):

3638

if itag in itags:

3639

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3640

return False

3641

itag = f'{itag}-{proto}'

3642

if itag:

3643

f['format_id'] = itag

3644

itags[itag] = proto

3645

3646

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3647

if f['quality'] == -1 and f.get('height'):

3648

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3653

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3654

if hls_manifest_url:

3655

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3656

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3657

subtitles = self._merge_subtitles(subs, subtitles)

3658

for f in fmts:

3659

if process_manifest_format(f, 'hls', self._search_regex(

3660

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3661

yield f

3662

3663

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3664

if dash_manifest_url:

3665

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3666

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3667

for f in formats:

3668

if process_manifest_format(f, 'dash', f['format_id']):

3669

f['filesize'] = int_or_none(self._search_regex(

3670

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3671

if needs_live_processing:

3672

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3678

spec = get_first(

3679

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3680

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3685

args = args.split('#')

3686

counts = list(map(int_or_none, args[:5]))

3687

if len(args) != 8 or not all(counts):

3688

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3689

continue

3690

width, height, frame_count, cols, rows = counts

3691

N, sigh = args[6:]

3692

3693

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3694

fragment_count = frame_count / (cols * rows)

3695

fragment_duration = duration / fragment_count

3696

yield {

3697

'format_id': f'sb{i}',

3698

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3711

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3712

} for j in range(math.ceil(fragment_count))],

3713

}

3714

3715

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3716

webpage = None

3717

if 'webpage' not in self._configuration_arg('player_skip'):

3718

query = {'bpctr': '9999999999', 'has_verified': '1'}

3719

if smuggled_data.get('is_story'):

3720

query['pp'] = self._STORY_PLAYER_PARAMS

3721

webpage = self._download_webpage(

3722

webpage_url, video_id, fatal=False, query=query)

3723

3724

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3725

3726

player_responses, player_url = self._extract_player_responses(

3727

self._get_requested_clients(url, smuggled_data),

3728

video_id, webpage, master_ytcfg, smuggled_data)

3729

3730

return webpage, master_ytcfg, player_responses, player_url

3731

3732

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3733

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3734

is_live = get_first(video_details, 'isLive')

3735

if is_live is None:

3736

is_live = get_first(live_broadcast_details, 'isLiveNow')

3737

live_content = get_first(video_details, 'isLiveContent')

3738

is_upcoming = get_first(video_details, 'isUpcoming')

3739

post_live = get_first(video_details, 'isPostLiveDvr')

3740

live_status = ('post_live' if post_live

3741

else 'is_live' if is_live

3742

else 'is_upcoming' if is_upcoming

3743

else 'was_live' if live_content

3744

else 'not_live' if False in (is_live, live_content)

3745

else None)

3746

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3747

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3748

3749

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3750

3751

def _real_extract(self, url):

3752

url, smuggled_data = unsmuggle_url(url, {})

3753

video_id = self._match_id(url)

3754

3755

base_url = self.http_scheme() + '//www.youtube.com/'

3756

webpage_url = base_url + 'watch?v=' + video_id

3757

3758

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3759

3760

playability_statuses = traverse_obj(

3761

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3762

3763

trailer_video_id = get_first(

3764

playability_statuses,

3765

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3766

expected_type=str)

3767

if trailer_video_id:

3768

return self.url_result(

3769

trailer_video_id, self.ie_key(), trailer_video_id)

3770

3771

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3772

if webpage else (lambda x: None))

3773

3774

video_details = traverse_obj(

3775

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3776

microformats = traverse_obj(

3777

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3778

expected_type=dict, default=[])

3779

3780

translated_title = self._get_text(microformats, (..., 'title'))

3781

video_title = (self._preferred_lang and translated_title

3782

or get_first(video_details, 'title') # primary

3783

or translated_title

3784

or search_meta(['og:title', 'twitter:title', 'title']))

3785

translated_description = self._get_text(microformats, (..., 'description'))

3786

original_description = get_first(video_details, 'shortDescription')

3787

video_description = (

3788

self._preferred_lang and translated_description

3789

# If original description is blank, it will be an empty string.

3790

# Do not prefer translated description in this case.

3791

or original_description if original_description is not None else translated_description)

3792

3793

multifeed_metadata_list = get_first(

3794

player_responses,

3795

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3796

expected_type=str)

3797

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3798

if self.get_param('noplaylist'):

3799

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3804

# Unquote should take place before split on comma (,) since textual

3805

# fields may contain comma as well (see

3806

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3807

feed_data = urllib.parse.parse_qs(

3808

urllib.parse.unquote_plus(feed))

3809

3810

def feed_entry(name):

3811

return try_get(

3812

feed_data, lambda x: x[name][0], str)

3813

3814

feed_id = feed_entry('id')

3815

if not feed_id:

3816

continue

3817

feed_title = feed_entry('title')

3818

title = video_title

3819

if feed_title:

3820

title += ' (%s)' % feed_title

3821

entries.append({

3822

'_type': 'url_transparent',

3823

'ie_key': 'Youtube',

3824

'url': smuggle_url(

3825

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3826

{'force_singlefeed': True}),

3827

'title': title,

3828

})

3829

feed_ids.append(feed_id)

3830

self.to_screen(

3831

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3832

% (', '.join(feed_ids), video_id))

3833

return self.playlist_result(

3834

entries, video_id, video_title, video_description)

3835

3836

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

3837

or int_or_none(get_first(microformats, 'lengthSeconds'))

3838

or parse_duration(search_meta('duration')) or None)

3839

3840

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3841

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3842

if live_status == 'post_live':

3843

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3844

3845

if not formats:

3846

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3847

self.report_drm(video_id)

3848

pemr = get_first(

3849

playability_statuses,

3850

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3851

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3852

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3853

if subreason:

3854

if subreason == 'The uploader has not made this video available in your country.':

3855

countries = get_first(microformats, 'availableCountries')

3856

if not countries:

3857

regions_allowed = search_meta('regionsAllowed')

3858

countries = regions_allowed.split(',') if regions_allowed else None

3859

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3860

reason += f'. {subreason}'

3861

if reason:

3862

self.raise_no_formats(reason, expected=True)

3863

3864

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3865

if not keywords and webpage:

3866

keywords = [

3867

unescapeHTML(m.group('content'))

3868

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3869

for keyword in keywords:

3870

if keyword.startswith('yt:stretch='):

3871

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3872

if mobj:

3873

# NB: float is intentional for forcing float division

3874

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3879

f['stretched_ratio'] = ratio

3880

break

3881

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3882

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3883

if thumbnail_url:

3884

thumbnails.append({

3885

'url': thumbnail_url,

3886

})

3887

original_thumbnails = thumbnails.copy()

3888

3889

# The best resolution thumbnails sometimes does not appear in the webpage

3890

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3891

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3892

thumbnail_names = [

3893

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3894

# in resolution, these are not the custom thumbnail. So de-prioritize them

3895

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3896

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3897

]

3898

n_thumbnail_names = len(thumbnail_names)

3899

thumbnails.extend({

3900

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3901

video_id=video_id, name=name, ext=ext,

3902

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

3903

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3904

for thumb in thumbnails:

3905

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3906

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3907

self._remove_duplicate_formats(thumbnails)

3908

self._downloader._sort_thumbnails(original_thumbnails)

3909

3910

category = get_first(microformats, 'category') or search_meta('genre')

3911

channel_id = str_or_none(

3912

get_first(video_details, 'channelId')

3913

or get_first(microformats, 'externalChannelId')

3914

or search_meta('channelId'))

3915

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3916

3917

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3918

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3919

if not duration and live_end_time and live_start_time:

3920

duration = live_end_time - live_start_time

3921

3922

needs_live_processing = self._needs_live_processing(live_status, duration)

3923

3924

def is_bad_format(fmt):

3925

if needs_live_processing and not fmt.get('is_from_start'):

3926

return True

3927

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

3928

and fmt.get('protocol') == 'http_dash_segments'):

3929

return True

3930

3931

for fmt in filter(is_bad_format, formats):

3932

fmt['preference'] = (fmt.get('preference') or -1) - 10

3933

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

3934

3935

if needs_live_processing:

3936

self._prepare_live_from_start_formats(

3937

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

3938

3939

formats.extend(self._extract_storyboard(player_responses, duration))

3940

3941

# source_preference is lower for throttled/potentially damaged formats

3942

self._sort_formats(formats, (

3943

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3948

'formats': formats,

3949

'thumbnails': thumbnails,

3950

# The best thumbnail that we are sure exists. Prevents unnecessary

3951

# URL checking if user don't care about getting the best possible thumbnail

3952

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3953

'description': video_description,

3954

'uploader': get_first(video_details, 'author'),

3955

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3956

'uploader_url': owner_profile_url,

3957

'channel_id': channel_id,

3958

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3959

'duration': duration,

3960

'view_count': int_or_none(

3961

get_first((video_details, microformats), (..., 'viewCount'))

3962

or search_meta('interactionCount')),

3963

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3964

'age_limit': 18 if (

3965

get_first(microformats, 'isFamilySafe') is False

3966

or search_meta('isFamilyFriendly') == 'false'

3967

or search_meta('og:restrictions:age') == '18+') else 0,

3968

'webpage_url': webpage_url,

3969

'categories': [category] if category else None,

3970

'tags': keywords,

3971

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3972

'live_status': live_status,

3973

'release_timestamp': live_start_time,

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3978

if pctr:

3979

def get_lang_code(track):

3980

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3981

or track.get('languageCode'))

3982

3983

# Converted into dicts to remove duplicates

3984

captions = {

3985

get_lang_code(sub): sub

3986

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3987

translation_languages = {

3988

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3989

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3990

3991

def process_language(container, base_url, lang_code, sub_name, query):

3992

lang_subs = container.setdefault(lang_code, [])

3993

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4004

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4005

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4006

for lang_code, caption_track in captions.items():

4007

base_url = caption_track.get('baseUrl')

4008

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4009

if not base_url:

4010

continue

4011

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4012

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4017

if not caption_track.get('isTranslatable'):

4018

continue

4019

for trans_code, trans_name in translation_languages.items():

4020

if not trans_code:

4021

continue

4022

orig_trans_code = trans_code

4023

if caption_track.get('kind') != 'asr':

4024

if not get_translated_subs:

4025

continue

4026

trans_code += f'-{lang_code}'

4027

trans_name += format_field(lang_name, None, ' from %s')

4028

# Add an "-orig" label to the original language so that it can be distinguished.

4029

# The subs are returned without "-orig" as well for compatibility

4030

if lang_code == f'a-{orig_trans_code}':

4031

process_language(

4032

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4033

# Setting tlang=lang returns damaged subtitles.

4034

process_language(automatic_captions, base_url, trans_code, trans_name,

4035

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4036

4037

info['automatic_captions'] = automatic_captions

4038

info['subtitles'] = subtitles

4039

4040

parsed_url = urllib.parse.urlparse(url)

4041

for component in [parsed_url.fragment, parsed_url.query]:

4042

query = urllib.parse.parse_qs(component)

4043

for k, v in query.items():

4044

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4045

d_k += '_time'

4046

if d_k not in info and k in s_ks:

4047

info[d_k] = parse_duration(query[k][0])

4048

4049

# Youtube Music Auto-generated description

4050

if video_description:

4051

mobj = re.search(

4052

r'''(?xs)

4053

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4054

(?P<album>[^\n]+)

4055

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4056

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4057

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4058

.+\nAuto-generated\ by\ YouTube\.\s*$

4059

''', video_description)

4060

if mobj:

4061

release_year = mobj.group('release_year')

4062

release_date = mobj.group('release_date')

4063

if release_date:

4064

release_date = release_date.replace('-', '')

4065

if not release_year:

4066

release_year = release_date[:4]

4067

info.update({

4068

'album': mobj.group('album'.strip()),

4069

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4070

'track': mobj.group('track').strip(),

4071

'release_date': release_date,

4072

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4078

if not initial_data:

4079

query = {'videoId': video_id}

4080

query.update(self._get_checkok_params())

4081

initial_data = self._extract_response(

4082

item_id=video_id, ep='next', fatal=False,

4083

ytcfg=master_ytcfg, query=query,

4084

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4085

note='Downloading initial data API JSON')

4086

4087

info['comment_count'] = traverse_obj(initial_data, (

4088

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4089

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4090

), (

4091

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4092

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4093

), expected_type=int_or_none, get_all=False)

4094

4095

try: # This will error if there is no livechat

4096

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4097

except (KeyError, IndexError, TypeError):

4098

pass

4099

else:

4100

info.setdefault('subtitles', {})['live_chat'] = [{

4101

# url is needed to set cookies

4102

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4103

'video_id': video_id,

4104

'ext': 'json',

4105

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4106

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4112

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4113

or self._extract_chapters_from_description(video_description, duration)

4114

or None)

4115

4116

contents = traverse_obj(

4117

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4118

expected_type=list, default=[])

4119

4120

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4121

if vpir:

4122

stl = vpir.get('superTitleLink')

4123

if stl:

4124

stl = self._get_text(stl)

4125

if try_get(

4126

vpir,

4127

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4128

info['location'] = stl

4129

else:

4130

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4131

if mobj:

4132

info.update({

4133

'series': mobj.group(1),

4134

'season_number': int(mobj.group(2)),

4135

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4144

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4145

default=[]))

4146

for tbr in tbrs:

4147

for getter, regex in [(

4148

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4149

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4150

lambda x: x['accessibility'],

4151

lambda x: x['accessibilityData']['accessibilityData'],

4152

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4153

label = (try_get(tbr, getter, dict) or {}).get('label')

4154

if label:

4155

mobj = re.match(regex, label)

4156

if mobj:

4157

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4158

break

4159

sbr_tooltip = try_get(

4160

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4161

if sbr_tooltip:

4162

like_count, dislike_count = sbr_tooltip.split(' / ')

4163

info.update({

4164

'like_count': str_to_int(like_count),

4165

'dislike_count': str_to_int(dislike_count),

4166

})

4167

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4168

if vcr:

4169

vc = self._get_count(vcr, 'viewCount')

4170

# Upcoming premieres with waiting count are treated as live here

4171

if vcr.get('isLive'):

4172

info['concurrent_view_count'] = vc

4173

elif info.get('view_count') is None:

4174

info['view_count'] = vc

4175

4176

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4177

if vsir:

4178

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4179

info.update({

4180

'channel': self._get_text(vor, 'title'),

4181

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4186

list) or []

4187

multiple_songs = False

4188

for row in rows:

4189

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4190

multiple_songs = True

4191

break

4192

for row in rows:

4193

mrr = row.get('metadataRowRenderer') or {}

4194

mrr_title = mrr.get('title')

4195

if not mrr_title:

4196

continue

4197

mrr_title = self._get_text(mrr, 'title')

4198

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4199

if mrr_title == 'License':

4200

info['license'] = mrr_contents_text

4201

elif not multiple_songs:

4202

if mrr_title == 'Album':

4203

info['album'] = mrr_contents_text

4204

elif mrr_title == 'Artist':

4205

info['artist'] = mrr_contents_text

4206

elif mrr_title == 'Song':

4207

info['track'] = mrr_contents_text

4208

4209

fallbacks = {

4210

'channel': 'uploader',

4211

'channel_id': 'uploader_id',

4212

'channel_url': 'uploader_url',

4213

}

4214

4215

# The upload date for scheduled, live and past live streams / premieres in microformats

4216

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4217

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4218

upload_date = (

4219

unified_strdate(get_first(microformats, 'uploadDate'))

4220

or unified_strdate(search_meta('uploadDate')))

4221

if not upload_date or (

4222

live_status in ('not_live', None)

4223

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4224

):

4225

upload_date = strftime_or_none(

4226

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4227

info['upload_date'] = upload_date

4228

4229

for to, frm in fallbacks.items():

4230

if not info.get(to):

4231

info[to] = info.get(frm)

4232

4233

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4239

4240

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4241

or get_first(video_details, 'isPrivate', expected_type=bool))

4242

4243

info['availability'] = (

4244

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4245

else self._availability(

4246

is_private=is_private,

4247

needs_premium=(

4248

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4249

or False if initial_data and is_private is not None else None),

4250

needs_subscription=(

4251

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4252

or False if initial_data and is_private is not None else None),

4253

needs_auth=info['age_limit'] >= 18,

4254

is_unlisted=None if is_private is None else (

4255

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4256

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4257

4258

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4259

4260

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4266

@staticmethod

4267

def passthrough_smuggled_data(func):

4268

def _smuggle(info, smuggled_data):

4269

if info.get('_type') not in ('url', 'url_transparent'):

4270

return info

4271

if smuggled_data.get('is_music_url'):

4272

parsed_url = urllib.parse.urlparse(info['url'])

4273

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4274

smuggled_data.pop('is_music_url')

4275

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4276

if smuggled_data:

4277

info['url'] = smuggle_url(info['url'], smuggled_data)

4278

return info

4279

4280

@functools.wraps(func)

4281

def wrapper(self, url):

4282

url, smuggled_data = unsmuggle_url(url, {})

4283

if self.is_music_url(url):

4284

smuggled_data['is_music_url'] = True

4285

info_dict = func(self, url, smuggled_data)

4286

if smuggled_data:

4287

_smuggle(info_dict, smuggled_data)

4288

if info_dict.get('entries'):

4289

info_dict['entries'] = (_smuggle(i, smuggled_data) for i in info_dict['entries'])

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4294

channel_id = self._html_search_meta(

4295

'channelId', webpage, 'channel id', default=None)

4296

if channel_id:

4297

return channel_id

4298

channel_url = self._html_search_meta(

4299

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4300

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4301

'twitter:app:url:googleplay'), webpage, 'channel url')

4302

return self._search_regex(

4303

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4304

channel_url, 'channel id')

4305

4306

@staticmethod

4307

def _extract_basic_item_renderer(item):

4308

# Modified from _extract_grid_item_renderer

4309

known_basic_renderers = (

4310

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4311

)

4312

for key, renderer in item.items():

4313

if not isinstance(renderer, dict):

4314

continue

4315

elif key in known_basic_renderers:

4316

return renderer

4317

elif key.startswith('grid') and key.endswith('Renderer'):

4318

return renderer

4319

4320

def _grid_entries(self, grid_renderer):

4321

for item in grid_renderer['items']:

4322

if not isinstance(item, dict):

4323

continue

4324

renderer = self._extract_basic_item_renderer(item)

4325

if not isinstance(renderer, dict):

4326

continue

4327

title = self._get_text(renderer, 'title')

4328

4329

# playlist

4330

playlist_id = renderer.get('playlistId')

4331

if playlist_id:

4332

yield self.url_result(

4333

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4334

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4339

if video_id:

4340

yield self._extract_video(renderer)

4341

continue

4342

# channel

4343

channel_id = renderer.get('channelId')

4344

if channel_id:

4345

yield self.url_result(

4346

'https://www.youtube.com/channel/%s' % channel_id,

4347

ie=YoutubeTabIE.ie_key(), video_title=title)

4348

continue

4349

# generic endpoint URL support

4350

ep_url = urljoin('https://www.youtube.com/', try_get(

4351

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4352

str))

4353

if ep_url:

4354

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4355

if ie.suitable(ep_url):

4356

yield self.url_result(

4357

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4358

break

4359

4360

def _music_reponsive_list_entry(self, renderer):

4361

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4362

if video_id:

4363

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4364

ie=YoutubeIE.ie_key(), video_id=video_id)

4365

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4366

if playlist_id:

4367

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4368

if video_id:

4369

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4370

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4371

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4372

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4373

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4374

if browse_id:

4375

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4376

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4377

4378

def _shelf_entries_from_content(self, shelf_renderer):

4379

content = shelf_renderer.get('content')

4380

if not isinstance(content, dict):

4381

return

4382

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4383

if renderer:

4384

# TODO: add support for nested playlists so each shelf is processed

4385

# as separate playlist

4386

# TODO: this includes only first N items

4387

yield from self._grid_entries(renderer)

4388

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4394

ep = try_get(

4395

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4396

str)

4397

shelf_url = urljoin('https://www.youtube.com', ep)

4398

if shelf_url:

4399

# Skipping links to another channels, note that checking for

4400

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4401

# will not work

4402

if skip_channels and '/channels?' in shelf_url:

4403

return

4404

title = self._get_text(shelf_renderer, 'title')

4405

yield self.url_result(shelf_url, video_title=title)

4406

# Shelf may not contain shelf URL, fallback to extraction from content

4407

yield from self._shelf_entries_from_content(shelf_renderer)

4408

4409

def _playlist_entries(self, video_list_renderer):

4410

for content in video_list_renderer['contents']:

4411

if not isinstance(content, dict):

4412

continue

4413

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4414

if not isinstance(renderer, dict):

4415

continue

4416

video_id = renderer.get('videoId')

4417

if not video_id:

4418

continue

4419

yield self._extract_video(renderer)

4420

4421

def _rich_entries(self, rich_grid_renderer):

4422

renderer = traverse_obj(

4423

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4424

video_id = renderer.get('videoId')

4425

if not video_id:

4426

return

4427

yield self._extract_video(renderer)

4428

4429

def _video_entry(self, video_renderer):

4430

video_id = video_renderer.get('videoId')

4431

if video_id:

4432

return self._extract_video(video_renderer)

4433

4434

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4435

url = urljoin('https://youtube.com', traverse_obj(

4436

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4437

if url:

4438

return self.url_result(

4439

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4440

4441

def _post_thread_entries(self, post_thread_renderer):

4442

post_renderer = try_get(

4443

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4444

if not post_renderer:

4445

return

4446

# video attachment

4447

video_renderer = try_get(

4448

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4449

video_id = video_renderer.get('videoId')

4450

if video_id:

4451

entry = self._extract_video(video_renderer)

4452

if entry:

4453

yield entry

4454

# playlist attachment

4455

playlist_id = try_get(

4456

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4457

if playlist_id:

4458

yield self.url_result(

4459

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4460

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4461

# inline video links

4462

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4463

for run in runs:

4464

if not isinstance(run, dict):

4465

continue

4466

ep_url = try_get(

4467

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4468

if not ep_url:

4469

continue

4470

if not YoutubeIE.suitable(ep_url):

4471

continue

4472

ep_video_id = YoutubeIE._match_id(ep_url)

4473

if video_id == ep_video_id:

4474

continue

4475

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4476

4477

def _post_thread_continuation_entries(self, post_thread_continuation):

4478

contents = post_thread_continuation.get('contents')

4479

if not isinstance(contents, list):

4480

return

4481

for content in contents:

4482

renderer = content.get('backstagePostThreadRenderer')

4483

if isinstance(renderer, dict):

4484

yield from self._post_thread_entries(renderer)

4485

continue

4486

renderer = content.get('videoRenderer')

4487

if isinstance(renderer, dict):

4488

yield self._video_entry(renderer)

4489

4490

r''' # unused

4491

def _rich_grid_entries(self, contents):

4492

for content in contents:

4493

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4494

if video_renderer:

4495

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4501

for url in traverse_obj(renderer, (

4502

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4503

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4504

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4505

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4506

4507

def _extract_entries(self, parent_renderer, continuation_list):

4508

# continuation_list is modified in-place with continuation_list = [continuation_token]

4509

continuation_list[:] = [None]

4510

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4511

for content in contents:

4512

if not isinstance(content, dict):

4513

continue

4514

is_renderer = traverse_obj(

4515

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4516

expected_type=dict)

4517

if not is_renderer:

4518

if content.get('richItemRenderer'):

4519

for entry in self._rich_entries(content['richItemRenderer']):

4520

yield entry

4521

continuation_list[0] = self._extract_continuation(parent_renderer)

4522

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4523

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4524

yield from self._report_history_entries(table)

4525

continuation_list[0] = self._extract_continuation(table)

4526

continue

4527

4528

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4529

for isr_content in isr_contents:

4530

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4535

'gridRenderer': self._grid_entries,

4536

'reelShelfRenderer': self._grid_entries,

4537

'shelfRenderer': self._shelf_entries,

4538

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4539

'backstagePostThreadRenderer': self._post_thread_entries,

4540

'videoRenderer': lambda x: [self._video_entry(x)],

4541

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4542

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4543

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4544

}

4545

for key, renderer in isr_content.items():

4546

if key not in known_renderers:

4547

continue

4548

for entry in known_renderers[key](renderer):

4549

if entry:

4550

yield entry

4551

continuation_list[0] = self._extract_continuation(renderer)

4552

break

4553

4554

if not continuation_list[0]:

4555

continuation_list[0] = self._extract_continuation(is_renderer)

4556

4557

if not continuation_list[0]:

4558

continuation_list[0] = self._extract_continuation(parent_renderer)

4559

4560

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4561

continuation_list = [None]

4562

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4563

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4568

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4569

yield from extract_entries(parent_renderer)

4570

continuation = continuation_list[0]

4571

4572

for page_num in itertools.count(1):

4573

if not continuation:

4574

break

4575

headers = self.generate_api_headers(

4576

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4577

response = self._extract_response(

4578

item_id=f'{item_id} page {page_num}',

4579

query=continuation, headers=headers, ytcfg=ytcfg,

4580

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4585

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4586

visitor_data = self._extract_visitor_data(response) or visitor_data

4587

4588

known_renderers = {

4589

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4590

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4591

'gridVideoRenderer': (self._grid_entries, 'items'),

4592

'gridChannelRenderer': (self._grid_entries, 'items'),

4593

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4594

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4595

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4596

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4597

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4598

'playlistVideoListContinuation': (self._playlist_entries, None),

4599

'gridContinuation': (self._grid_entries, None),

4600

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4601

'sectionListContinuation': (extract_entries, None), # for feeds

4602

}

4603

4604

continuation_items = traverse_obj(response, (

4605

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4606

'appendContinuationItemsAction', 'continuationItems'

4607

), 'continuationContents', get_all=False)

4608

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4609

4610

video_items_renderer = None

4611

for key in continuation_item.keys():

4612

if key not in known_renderers:

4613

continue

4614

func, parent_key = known_renderers[key]

4615

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4616

continuation_list = [None]

4617

yield from func(video_items_renderer)

4618

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4619

4620

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4625

for tab_renderer in tabs:

4626

if tab_renderer.get('selected'):

4627

return tab_renderer

4628

if fatal:

4629

raise ExtractorError('Unable to find selected tab')

4630

4631

@staticmethod

4632

def _extract_tab_renderers(response):

4633

return traverse_obj(

4634

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

4635

4636

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4637

metadata = self._extract_metadata_from_tabs(item_id, data)

4638

4639

selected_tab = self._extract_selected_tab(tabs)

4640

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

4641

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

4642

4643

return self.playlist_result(

4644

self._entries(

4645

selected_tab, metadata['id'], ytcfg,

4646

self._extract_account_syncid(ytcfg, data),

4647

self._extract_visitor_data(data, ytcfg)),

4648

**metadata)

4649

4650

def _extract_metadata_from_tabs(self, item_id, data):

4651

info = {'id': item_id}

4652

4653

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

4654

if metadata_renderer:

4655

info.update({

4656

'uploader': metadata_renderer.get('title'),

4657

'uploader_id': metadata_renderer.get('externalId'),

4658

'uploader_url': metadata_renderer.get('channelUrl'),

4659

})

4660

if info['uploader_id']:

4661

info['id'] = info['uploader_id']

4662

else:

4663

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

4664

4665

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4666

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4667

def _get_uncropped(url):

4668

return url_or_none((url or '').split('=')[0] + '=s0')

4669

4670

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

4671

if avatar_thumbnails:

4672

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4673

if uncropped_avatar:

4674

avatar_thumbnails.append({

4675

'url': uncropped_avatar,

4676

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4681

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

4682

for banner in channel_banners:

4683

banner['preference'] = -10

4684

4685

if channel_banners:

4686

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4687

if uncropped_banner:

4688

channel_banners.append({

4689

'url': uncropped_banner,

4690

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

4695

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4696

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

4697

4698

primary_thumbnails = self._extract_thumbnails(

4699

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4700

playlist_thumbnails = self._extract_thumbnails(

4701

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

4702

4703

info.update({

4704

'title': (traverse_obj(metadata_renderer, 'title')

4705

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

4706

or info['id']),

4707

'availability': self._extract_availability(data),

4708

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4709

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

4710

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

4711

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

4712

})

4713

4714

# Playlist stats is a text runs array containing [video count, view count, last updated].

4715

# last updated or (view count and last updated) may be missing.

4716

playlist_stats = get_first(

4717

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

4718

4719

last_updated_unix = self._parse_time_text(

4720

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

4721

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

4722

info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')

4723

4724

info['view_count'] = self._get_count(playlist_stats, 1)

4725

if info['view_count'] is None: # 0 is allowed

4726

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

4727

4728

info['playlist_count'] = self._get_count(playlist_stats, 0)

4729

if info['playlist_count'] is None: # 0 is allowed

4730

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

4731

4732

if not info.get('uploader_id'):

4733

owner = traverse_obj(playlist_header_renderer, 'ownerText')

4734

if not owner: # Deprecated

4735

owner = traverse_obj(

4736

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

4737

('videoOwner', 'videoOwnerRenderer', 'title'))

4738

owner_text = self._get_text(owner)

4739

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

4740

info.update({

4741

'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

4742

'uploader_id': browse_ep.get('browseId'),

4743

'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))

})

info.update({

'channel': info['uploader'],

4748

'channel_id': info['uploader_id'],

4749

'channel_url': info['uploader_url']

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4754

first_id = last_id = response = None

4755

for page_num in itertools.count(1):

4756

videos = list(self._playlist_entries(playlist))

4757

if not videos:

4758

return

4759

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4760

if start >= len(videos):

4761

return

4762

yield from videos[start:]

4763

first_id = first_id or videos[0]['id']

4764

last_id = videos[-1]['id']

4765

watch_endpoint = try_get(

4766

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4767

headers = self.generate_api_headers(

4768

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4769

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4770

query = {

4771

'playlistId': playlist_id,

4772

'videoId': watch_endpoint.get('videoId') or last_id,

4773

'index': watch_endpoint.get('index') or len(videos),

4774

'params': watch_endpoint.get('params') or 'OAE%3D'

4775

}

4776

response = self._extract_response(

4777

item_id='%s page %d' % (playlist_id, page_num),

4778

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4779

check_get_keys='contents'

4780

)

4781

playlist = try_get(

4782

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4783

4784

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4785

title = playlist.get('title') or try_get(

4786

data, lambda x: x['titleText']['simpleText'], str)

4787

playlist_id = playlist.get('playlistId') or item_id

4788

4789

# Delegating everything except mix playlists to regular tab-based playlist URL

4790

playlist_url = urljoin(url, try_get(

4791

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4792

str))

4793

4794

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4795

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4796

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4797

4798

if playlist_url and playlist_url != url and not is_known_unviewable:

4799

return self.url_result(

4800

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4801

video_title=title)

4802

4803

return self.playlist_result(

4804

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4805

playlist_id=playlist_id, playlist_title=title)

4806

4807

def _extract_availability(self, data):

4808

"""

4809

Gets the availability of a given playlist/tab.

4810

Note: Unless YouTube tells us explicitly, we do not assume it is public

4811

@param data: response

4812

"""

4813

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4814

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

4815

player_header_privacy = playlist_header_renderer.get('privacy')

4816

4817

badges = self._extract_badges(sidebar_renderer)

4818

4819

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4820

privacy_setting_icon = get_first(

4821

(playlist_header_renderer, sidebar_renderer),

4822

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4823

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4824

expected_type=str)

4825

4826

microformats_is_unlisted = traverse_obj(

4827

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4832

or player_header_privacy == 'PUBLIC'

4833

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4834

else self._availability(

4835

is_private=(

4836

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4837

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4838

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4839

is_unlisted=(

4840

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4841

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4842

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

4843

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

4844

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4845

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4850

sidebar_renderer = try_get(

4851

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4852

for item in sidebar_renderer:

4853

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4858

"""

4859

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

4860

"""

4861

is_playlist = bool(traverse_obj(

4862

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

4863

if not is_playlist:

4864

return

4865

headers = self.generate_api_headers(

4866

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4867

visitor_data=self._extract_visitor_data(data, ytcfg))

4868

query = {

4869

'params': 'wgYCCAA=',

4870

'browseId': f'VL{item_id}'

4871

}

4872

return self._extract_response(

4873

item_id=item_id, headers=headers, query=query,

4874

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4875

note='Redownloading playlist API JSON with unavailable videos')

4876

4877

@functools.cached_property

4878

def skip_webpage(self):

4879

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4880

4881

def _extract_webpage(self, url, item_id, fatal=True):

4882

webpage, data = None, None

4883

for retry in self.RetryManager(fatal=fatal):

4884

try:

4885

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4886

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4887

except ExtractorError as e:

4888

if isinstance(e.cause, network_exceptions):

4889

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4890

retry.error = e

4891

continue

4892

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4897

except ExtractorError as e:

4898

self._error_or_warning(e, fatal=fatal)

4899

break

4900

4901

# Sometimes youtube returns a webpage with incomplete ytInitialData

4902

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4903

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4904

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4910

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4911

if not ytcfg and self.is_authenticated:

4912

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4913

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4914

raise ExtractorError(

4915

f'{msg}. If you are not downloading private content, or '

4916

'your cookies are only for the first account and channel,'

4917

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4918

expected=True)

4919

self.report_warning(msg, only_once=True)

4920

4921

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4922

data = None

4923

if not self.skip_webpage:

4924

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4925

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4926

# Reject webpage data if redirected to home page without explicitly requesting

4927

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

4928

if (url != 'https://www.youtube.com/feed/recommended'

4929

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4930

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4931

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4932

if fatal:

4933

raise ExtractorError(msg, expected=True)

4934

self.report_warning(msg, only_once=True)

4935

if not data:

4936

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4937

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4938

return data, ytcfg

4939

4940

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4941

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4942

resolve_response = self._extract_response(

4943

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4944

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4945

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4946

for ep_key, ep in endpoints.items():

4947

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4948

if params:

4949

return self._extract_response(

4950

item_id=item_id, query=params, ep=ep, headers=headers,

4951

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4952

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4953

err_note = 'Failed to resolve url (does the playlist exist?)'

4954

if fatal:

4955

raise ExtractorError(err_note, expected=True)

4956

self.report_warning(err_note, item_id)

4957

4958

_SEARCH_PARAMS = None

4959

4960

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4961

data = {'query': query}

4962

if params is NO_DEFAULT:

4963

params = self._SEARCH_PARAMS

4964

if params:

4965

data['params'] = params

4966

4967

content_keys = (

4968

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4969

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4970

# ytmusic search

4971

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4972

('continuationContents', ),

4973

)

4974

display_id = f'query "{query}"'

4975

check_get_keys = tuple({keys[0] for keys in content_keys})

4976

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4977

self._report_playlist_authcheck(ytcfg, fatal=False)

4978

4979

continuation_list = [None]

4980

search = None

4981

for page_num in itertools.count(1):

4982

data.update(continuation_list[0] or {})

4983

headers = self.generate_api_headers(

4984

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4985

search = self._extract_response(

4986

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4987

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4988

slr_contents = traverse_obj(search, *content_keys)

4989

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4990

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4995

IE_DESC = 'YouTube Tabs'

4996

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5005

(?P<not_channel>

5006

feed/|hashtag/|

5007

(?:playlist|watch)\?.*?\blist=

5008

)|

5009

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5014

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5015

}

5016

IE_NAME = 'youtube:tab'

5017

5018

_TESTS = [{

5019

'note': 'playlists, multipage',

5020

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5021

'playlist_mincount': 94,

5022

'info_dict': {

5023

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5024

'title': 'Igor Kleiner - Playlists',

5025

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5026

'uploader': 'Igor Kleiner',

5027

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5028

'channel': 'Igor Kleiner',

5029

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5030

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5031

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5032

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5033

'channel_follower_count': int

5034

},

5035

}, {

5036

'note': 'playlists, multipage, different order',

5037

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5038

'playlist_mincount': 94,

5039

'info_dict': {

5040

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5041

'title': 'Igor Kleiner - Playlists',

5042

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5043

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5044

'uploader': 'Igor Kleiner',

5045

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5046

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5047

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5048

'channel': 'Igor Kleiner',

5049

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5050

'channel_follower_count': int

5051

},

5052

}, {

5053

'note': 'playlists, series',

5054

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5055

'playlist_mincount': 5,

5056

'info_dict': {

5057

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5058

'title': '3Blue1Brown - Playlists',

5059

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5060

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5061

'uploader': '3Blue1Brown',

5062

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5063

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5064

'channel': '3Blue1Brown',

5065

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5066

'tags': ['Mathematics'],

5067

'channel_follower_count': int

5068

},

5069

}, {

5070

'note': 'playlists, singlepage',

5071

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5072

'playlist_mincount': 4,

5073

'info_dict': {

5074

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5075

'title': 'ThirstForScience - Playlists',

5076

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5077

'uploader': 'ThirstForScience',

5078

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5079

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5080

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5081

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5082

'tags': 'count:13',

5083

'channel': 'ThirstForScience',

5084

'channel_follower_count': int

5085

}

5086

}, {

5087

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5088

'only_matching': True,

5089

}, {

5090

'note': 'basic, single video playlist',

5091

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5092

'info_dict': {

5093

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5094

'uploader': 'Sergey M.',

5095

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5096

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5101

'channel': 'Sergey M.',

5102

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5103

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5104

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5105

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5110

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5111

'info_dict': {

5112

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5113

'uploader': 'Sergey M.',

5114

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5115

'title': 'youtube-dl empty playlist',

5116

'tags': [],

5117

'channel': 'Sergey M.',

5118

'description': '',

5119

'modified_date': '20160902',

5120

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5121

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5122

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5123

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5129

'info_dict': {

5130

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5131

'title': 'lex will - Home',

5132

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5133

'uploader': 'lex will',

5134

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5135

'channel': 'lex will',

5136

'tags': ['bible', 'history', 'prophesy'],

5137

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5138

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5139

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5140

'channel_follower_count': int

5141

},

5142

'playlist_mincount': 2,

5143

}, {

5144

'note': 'Videos tab',

5145

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5146

'info_dict': {

5147

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5148

'title': 'lex will - Videos',

5149

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5150

'uploader': 'lex will',

5151

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5152

'tags': ['bible', 'history', 'prophesy'],

5153

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5154

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5155

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5156

'channel': 'lex will',

5157

'channel_follower_count': int

5158

},

5159

'playlist_mincount': 975,

5160

}, {

5161

'note': 'Videos tab, sorted by popular',

5162

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5163

'info_dict': {

5164

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5165

'title': 'lex will - Videos',

5166

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5167

'uploader': 'lex will',

5168

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5169

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5170

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5171

'channel': 'lex will',

5172

'tags': ['bible', 'history', 'prophesy'],

5173

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5174

'channel_follower_count': int

5175

},

5176

'playlist_mincount': 199,

5177

}, {

5178

'note': 'Playlists tab',

5179

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5180

'info_dict': {

5181

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5182

'title': 'lex will - Playlists',

5183

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5184

'uploader': 'lex will',

5185

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5186

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5187

'channel': 'lex will',

5188

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5189

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5190

'tags': ['bible', 'history', 'prophesy'],

5191

'channel_follower_count': int

5192

},

5193

'playlist_mincount': 17,

5194

}, {

5195

'note': 'Community tab',

5196

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5197

'info_dict': {

5198

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5199

'title': 'lex will - Community',

5200

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5201

'uploader': 'lex will',

5202

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5203

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5204

'channel': 'lex will',

5205

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5206

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5207

'tags': ['bible', 'history', 'prophesy'],

5208

'channel_follower_count': int

5209

},

5210

'playlist_mincount': 18,

5211

}, {

5212

'note': 'Channels tab',

5213

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5214

'info_dict': {

5215

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5216

'title': 'lex will - Channels',

5217

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5218

'uploader': 'lex will',

5219

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5220

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5221

'channel': 'lex will',

5222

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5223

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5224

'tags': ['bible', 'history', 'prophesy'],

5225

'channel_follower_count': int

5226

},

5227

'playlist_mincount': 12,

5228

}, {

5229

'note': 'Search tab',

5230

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5231

'playlist_mincount': 40,

5232

'info_dict': {

5233

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5234

'title': '3Blue1Brown - Search - linear algebra',

5235

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5236

'uploader': '3Blue1Brown',

5237

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5238

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5239

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5240

'tags': ['Mathematics'],

5241

'channel': '3Blue1Brown',

5242

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5243

'channel_follower_count': int

5244

},

5245

}, {

5246

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5247

'only_matching': True,

5248

}, {

5249

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5250

'only_matching': True,

5251

}, {

5252

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5253

'only_matching': True,

5254

}, {

5255

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5256

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5257

'info_dict': {

5258

'title': '29C3: Not my department',

5259

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5260

'uploader': 'Christiaan008',

5261

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5262

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5263

'tags': [],

5264

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5265

'view_count': int,

5266

'modified_date': '20150605',

5267

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5268

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5269

'channel': 'Christiaan008',

5270

'availability': 'public',

5271

},

5272

'playlist_count': 96,

5273

}, {

5274

'note': 'Large playlist',

5275

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5276

'info_dict': {

5277

'title': 'Uploads from Cauchemar',

5278

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5279

'uploader': 'Cauchemar',

5280

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5281

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5282

'tags': [],

5283

'modified_date': r're:\d{8}',

5284

'channel': 'Cauchemar',

5285

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5286

'view_count': int,

5287

'description': '',

5288

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5289

'availability': 'public',

5290

},

5291

'playlist_mincount': 1123,

5292

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5293

}, {

5294

'note': 'even larger playlist, 8832 videos',

5295

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5296

'only_matching': True,

5297

}, {

5298

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5299

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5300

'info_dict': {

5301

'title': 'Uploads from Interstellar Movie',

5302

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5303

'uploader': 'Interstellar Movie',

5304

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5305

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5306

'tags': [],

5307

'view_count': int,

5308

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5309

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5310

'channel': 'Interstellar Movie',

5311

'description': '',

5312

'modified_date': r're:\d{8}',

5313

'availability': 'public',

5314

},

5315

'playlist_mincount': 21,

5316

}, {

5317

'note': 'Playlist with "show unavailable videos" button',

5318

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5319

'info_dict': {

5320

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5321

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5322

'uploader': 'Phim Siêu Nhân Nhật Bản',

5323

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5324

'view_count': int,

5325

'channel': 'Phim Siêu Nhân Nhật Bản',

5326

'tags': [],

5327

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5328

'description': '',

5329

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5330

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5331

'modified_date': r're:\d{8}',

5332

'availability': 'public',

5333

},

5334

'playlist_mincount': 200,

5335

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5336

}, {

5337

'note': 'Playlist with unavailable videos in page 7',

5338

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5339

'info_dict': {

5340

'title': 'Uploads from BlankTV',

5341

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5342

'uploader': 'BlankTV',

5343

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5344

'channel': 'BlankTV',

5345

'channel_url': 'https://www.youtube.com/c/blanktv',

5346

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5347

'view_count': int,

5348

'tags': [],

5349

'uploader_url': 'https://www.youtube.com/c/blanktv',

5350

'modified_date': r're:\d{8}',

5351

'description': '',

5352

'availability': 'public',

5353

},

5354

'playlist_mincount': 1000,

5355

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5356

}, {

5357

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5358

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5359

'info_dict': {

5360

'title': 'Data Analysis with Dr Mike Pound',

5361

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5362

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5363

'uploader': 'Computerphile',

5364

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5365

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5366

'tags': [],

5367

'view_count': int,

5368

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5369

'channel_url': 'https://www.youtube.com/user/Computerphile',

5370

'channel': 'Computerphile',

5371

'availability': 'public',

5372

'modified_date': '20190712',

5373

},

5374

'playlist_mincount': 11,

5375

}, {

5376

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5377

'only_matching': True,

5378

}, {

5379

'note': 'Playlist URL that does not actually serve a playlist',

5380

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5385

'uploader': 'STREEM',

5386

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5387

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5388

'upload_date': '20150526',

5389

'license': 'Standard YouTube License',

5390

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5391

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5398

},

5399

'skip': 'This video is not available.',

5400

'add_ie': [YoutubeIE.ie_key()],

5401

}, {

5402

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5403

'only_matching': True,

5404

}, {

5405

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5406

'only_matching': True,

5407

}, {

5408

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5409

'info_dict': {

5410

'id': 'Wq15eF5vCbI', # This will keep changing

5411

'ext': 'mp4',

5412

'title': str,

5413

'uploader': 'Sky News',

5414

'uploader_id': 'skynews',

5415

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5416

'upload_date': r're:\d{8}',

5417

'description': str,

5418

'categories': ['News & Politics'],

5419

'tags': list,

5420

'like_count': int,

5421

'release_timestamp': int,

5422

'channel': 'Sky News',

5423

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5424

'age_limit': 0,

5425

'view_count': int,

5426

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5427

'playable_in_embed': True,

5428

'release_date': r're:\d+',

5429

'availability': 'public',

5430

'live_status': 'is_live',

5431

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5432

'channel_follower_count': int,

5433

'concurrent_view_count': int,

5434

},

5435

'params': {

5436

'skip_download': True,

5437

},

5438

'expected_warnings': ['Ignoring subtitle tracks found in '],

5439

}, {

5440

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5445

'uploader': 'The Young Turks',

5446

'uploader_id': 'TheYoungTurks',

5447

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5448

'upload_date': '20150715',

5449

'license': 'Standard YouTube License',

5450

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5451

'categories': ['News & Politics'],

5452

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5457

},

5458

'only_matching': True,

5459

}, {

5460

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5461

'only_matching': True,

5462

}, {

5463

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5464

'only_matching': True,

5465

}, {

5466

'note': 'A channel that is not live. Should raise error',

5467

'url': 'https://www.youtube.com/user/numberphile/live',

5468

'only_matching': True,

5469

}, {

5470

'url': 'https://www.youtube.com/feed/trending',

5471

'only_matching': True,

5472

}, {

5473

'url': 'https://www.youtube.com/feed/library',

5474

'only_matching': True,

5475

}, {

5476

'url': 'https://www.youtube.com/feed/history',

5477

'only_matching': True,

5478

}, {

5479

'url': 'https://www.youtube.com/feed/subscriptions',

5480

'only_matching': True,

5481

}, {

5482

'url': 'https://www.youtube.com/feed/watch_later',

5483

'only_matching': True,

5484

}, {

5485

'note': 'Recommended - redirects to home page.',

5486

'url': 'https://www.youtube.com/feed/recommended',

5487

'only_matching': True,

5488

}, {

5489

'note': 'inline playlist with not always working continuations',

5490

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5491

'only_matching': True,

5492

}, {

5493

'url': 'https://www.youtube.com/course',

5494

'only_matching': True,

5495

}, {

5496

'url': 'https://www.youtube.com/zsecurity',

5497

'only_matching': True,

5498

}, {

5499

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5500

'only_matching': True,

5501

}, {

5502

'url': 'https://www.youtube.com/TheYoungTurks/live',

5503

'only_matching': True,

5504

}, {

5505

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5512

}, {

5513

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5514

'only_matching': True,

5515

}, {

5516

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5517

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5518

'only_matching': True

5519

}, {

5520

'note': '/browse/ should redirect to /channel/',

5521

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5522

'only_matching': True

5523

}, {

5524

'note': 'VLPL, should redirect to playlist?list=PL...',

5525

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5526

'info_dict': {

5527

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5528

'uploader': 'NoCopyrightSounds',

5529

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5530

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5531

'title': 'NCS : All Releases 💿',

5532

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5533

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5534

'modified_date': r're:\d{8}',

5535

'view_count': int,

5536

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5537

'tags': [],

5538

'channel': 'NoCopyrightSounds',

5539

'availability': 'public',

5540

},

5541

'playlist_mincount': 166,

5542

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5543

}, {

5544

'note': 'Topic, should redirect to playlist?list=UU...',

5545

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5546

'info_dict': {

5547

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5548

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5549

'title': 'Uploads from Royalty Free Music - Topic',

5550

'uploader': 'Royalty Free Music - Topic',

5551

'tags': [],

5552

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5553

'channel': 'Royalty Free Music - Topic',

5554

'view_count': int,

5555

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5556

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5557

'modified_date': r're:\d{8}',

5558

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5559

'description': '',

5560

'availability': 'public',

5561

},

5562

'playlist_mincount': 101,

5563

}, {

5564

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5565

# Treat as a general feed

5566

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5567

'info_dict': {

5568

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5569

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5570

'tags': [],

5571

},

5572

'playlist_mincount': 9,

5573

}, {

5574

'note': 'Youtube music Album',

5575

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5576

'info_dict': {

5577

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5578

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5583

'modified_date': r're:\d{8}',

5584

},

5585

'playlist_count': 50,

5586

}, {

5587

'note': 'unlisted single video playlist',

5588

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5589

'info_dict': {

5590

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5591

'uploader': 'colethedj',

5592

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5593

'title': 'yt-dlp unlisted playlist test',

5594

'availability': 'unlisted',

5595

'tags': [],

5596

'modified_date': '20220418',

5597

'channel': 'colethedj',

5598

'view_count': int,

5599

'description': '',

5600

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5601

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5602

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5607

'url': 'https://www.youtube.com/feed/recommended',

5608

'info_dict': {

5609

'id': 'recommended',

5610

'title': 'recommended',

5611

'tags': [],

5612

},

5613

'playlist_mincount': 50,

5614

'params': {

5615

'skip_download': True,

5616

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5617

},

5618

}, {

5619

'note': 'API Fallback: /videos tab, sorted by oldest first',

5620

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5621

'info_dict': {

5622

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5623

'title': 'Cody\'sLab - Videos',

5624

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5625

'uploader': 'Cody\'sLab',

5626

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5627

'channel': 'Cody\'sLab',

5628

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5629

'tags': [],

5630

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5631

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5632

'channel_follower_count': int

5633

},

5634

'playlist_mincount': 650,

5635

'params': {

5636

'skip_download': True,

5637

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5638

},

5639

'skip': 'Query for sorting no longer works',

5640

}, {

5641

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5642

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5643

'info_dict': {

5644

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5645

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5646

'title': 'Uploads from Royalty Free Music - Topic',

5647

'uploader': 'Royalty Free Music - Topic',

5648

'modified_date': r're:\d{8}',

5649

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5650

'description': '',

5651

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5652

'tags': [],

5653

'channel': 'Royalty Free Music - Topic',

5654

'view_count': int,

5655

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5656

'availability': 'public',

5657

},

5658

'playlist_mincount': 101,

5659

'params': {

5660

'skip_download': True,

5661

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5662

},

5663

}, {

5664

'note': 'non-standard redirect to regional channel',

5665

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5666

'only_matching': True

5667

}, {

5668

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5669

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5670

'info_dict': {

5671

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5672

'modified_date': '20220407',

5673

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5674

'tags': [],

5675

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5676

'uploader': 'pukkandan',

5677

'availability': 'unlisted',

5678

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5679

'channel': 'pukkandan',

5680

'description': 'Test for collaborative playlist',

5681

'title': 'yt-dlp test - collaborative playlist',

5682

'view_count': int,

5683

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5684

},

5685

'playlist_mincount': 2

5686

}, {

5687

'note': 'translated tab name',

5688

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5689

'info_dict': {

5690

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5691

'tags': [],

5692

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5693

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5694

'description': 'test description',

5695

'title': 'cole-dlp-test-acc - 再生リスト',

5696

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5697

'uploader': 'cole-dlp-test-acc',

5698

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5699

'channel': 'cole-dlp-test-acc',

5700

'channel_follower_count': int,

5701

},

5702

'playlist_mincount': 1,

5703

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5704

'expected_warnings': ['Preferring "ja"'],

5705

}, {

5706

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5707

'note': 'preferred lang set with playlist with translated video titles',

5708

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5709

'info_dict': {

5710

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5711

'tags': [],

5712

'view_count': int,

5713

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5714

'uploader': 'cole-dlp-test-acc',

5715

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5716

'channel': 'cole-dlp-test-acc',

5717

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5718

'description': 'test',

5719

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5720

'title': 'dlp test playlist',

5721

'availability': 'public',

5722

},

5723

'playlist_mincount': 1,

5724

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5725

'expected_warnings': ['Preferring "ja"'],

5726

}, {

5727

# shorts audio pivot for 2GtVksBMYFM.

5728

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5729

'info_dict': {

5730

'id': 'sfv_audio_pivot',

5731

'title': 'sfv_audio_pivot',

5732

'tags': [],

5733

},

5734

'playlist_mincount': 50,

5735

5736

}, {

5737

# Channel with a real live tab (not to be mistaken with streams tab)

5738

# Do not treat like it should redirect to live stream

5739

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

5740

'info_dict': {

5741

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

5742

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

5743

'tags': [],

5744

},

5745

'playlist_mincount': 20,

5746

}, {

5747

# Tab name is not the same as tab id

5748

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

5749

'info_dict': {

5750

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5751

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

5752

'tags': [],

5753

},

5754

'playlist_mincount': 8,

5755

}, {

5756

# Home tab id is literally home. Not to get mistaken with featured

5757

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

5758

'info_dict': {

5759

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5760

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

5761

'tags': [],

5762

},

5763

'playlist_mincount': 8,

5764

}, {

5765

# Should get three playlists for videos, shorts and streams tabs

5766

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5767

'info_dict': {

5768

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5769

'title': 'Polka Ch. 尾丸ポルカ',

5770

'channel_follower_count': int,

5771

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5772

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5773

'uploader': 'Polka Ch. 尾丸ポルカ',

5774

'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',

5775

'channel': 'Polka Ch. 尾丸ポルカ',

5776

'tags': 'count:35',

5777

'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5778

'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

5783

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

5784

'info_dict': {

5785

'id': 'UC0intLFzLaudFG-xAvUEO-A',

5786

'title': 'Not Just Bikes - Shorts',

5787

'tags': 'count:12',

5788

'uploader': 'Not Just Bikes',

5789

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5790

'description': 'md5:7513148b1f02b924783157d84c4ea555',

5791

'channel_follower_count': int,

5792

'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',

5793

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

5794

'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5795

'channel': 'Not Just Bikes',

5796

},

5797

'playlist_mincount': 10,

5798

}, {

5799

# Streams tab

5800

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

5801

'info_dict': {

5802

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5803

'title': '中村悠一 - Live',

5804

'tags': 'count:7',

5805

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5806

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5807

'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5808

'channel': '中村悠一',

5809

'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5810

'channel_follower_count': int,

5811

'uploader': '中村悠一',

5812

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

5813

},

5814

'playlist_mincount': 60,

5815

}, {

5816

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

5817

# See test_youtube_lists

5818

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

5819

'only_matching': True,

5820

}, {

5821

# No uploads and no UCID given. Should fail with no uploads error

5822

# See test_youtube_lists

5823

'url': 'https://www.youtube.com/news',

5824

'only_matching': True

5825

}, {

5826

# No videos tab but has a shorts tab

5827

'url': 'https://www.youtube.com/c/TKFShorts',

5828

'info_dict': {

5829

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5830

'title': 'Shorts Break - Shorts',

5831

'tags': 'count:32',

5832

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5833

'channel': 'Shorts Break',

5834

'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',

5835

'uploader': 'Shorts Break',

5836

'channel_follower_count': int,

5837

'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5838

'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5839

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5840

},

5841

'playlist_mincount': 30,

5842

}, {

5843

# Trending Now Tab. tab id is empty

5844

'url': 'https://www.youtube.com/feed/trending',

5845

'info_dict': {

5846

'id': 'trending',

5847

'title': 'trending - Now',

5848

'tags': [],

5849

},

5850

'playlist_mincount': 30,

5851

}, {

5852

# Trending Gaming Tab. tab id is empty

5853

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

5854

'info_dict': {

5855

'id': 'trending',

5856

'title': 'trending - Gaming',

5857

'tags': [],

5858

},

5859

'playlist_mincount': 30,

5860

}, {

5861

# Shorts url result in shorts tab

5862

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

5863

'info_dict': {

5864

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5865

'title': 'cole-dlp-test-acc - Shorts',

5866

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5867

'channel': 'cole-dlp-test-acc',

5868

'channel_follower_count': int,

5869

'description': 'test description',

5870

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5871

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5872

'tags': [],

5873

'uploader': 'cole-dlp-test-acc',

5874

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

5882

'id': 'sSM9J5YH_60',

5883

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5884

'title': 'SHORT short',

5885

'channel': 'cole-dlp-test-acc',

5886

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

5892

}, {

5893

# Live video status should be extracted

5894

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

5895

'info_dict': {

5896

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5897

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

5905

'id': str,

5906

'title': str,

5907

'live_status': 'is_live',

5908

'channel_id': str,

5909

'channel_url': str,

5910

'concurrent_view_count': int,

'channel': str,

}

}],

'params': {'extract_flat': True},

5915

'playlist_mincount': 1

}]

@classmethod

def suitable(cls, url):

5920

return False if YoutubeIE.suitable(url) else super().suitable(url)

5921

5922

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

5923

5924

def _get_url_mobj(self, url):

5925

mobj = self._URL_RE.match(url).groupdict()

5926

mobj.update((k, '') for k, v in mobj.items() if v is None)

5927

return mobj

5928

5929

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

5930

tab_name = (tab.get('title') or '').lower()

5931

tab_url = urljoin(base_url, traverse_obj(

5932

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

5933

5934

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

5935

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

5936

if tab_id:

5937

return {

5938

'TAB_ID_SPONSORSHIPS': 'membership',

5939

}.get(tab_id, tab_id), tab_name

5940

5941

# Fallback to tab name if we cannot get the tab id.

5942

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

5943

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

5944

if tab_name:

5945

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

5950

5951

def _has_tab(self, tabs, tab_id):

5952

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

5953

5954

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5955

def _real_extract(self, url, smuggled_data):

5956

item_id = self._match_id(url)

5957

url = urllib.parse.urlunparse(

5958

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5959

compat_opts = self.get_param('compat_opts', [])

5960

5961

mobj = self._get_url_mobj(url)

5962

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

5963

if is_channel and smuggled_data.get('is_music_url'):

5964

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5965

return self.url_result(

5966

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

5967

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5968

mdata = self._extract_tab_endpoint(

5969

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5970

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5971

get_all=False, expected_type=str)

5972

if not murl:

5973

raise ExtractorError('Failed to resolve album to playlist')

5974

return self.url_result(murl, YoutubeTabIE)

5975

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5976

return self.url_result(

5977

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

5978

5979

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

5980

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5981

url = f'{pre}/videos{post}'

5982

5983

# Handle both video/playlist URLs

5984

qs = parse_qs(url)

5985

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

5986

if not video_id and mobj['not_channel'].startswith('watch'):

5987

if not playlist_id:

5988

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5989

raise ExtractorError('A video URL was given without video ID', expected=True)

5990

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5991

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5992

return self.url_result(

5993

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

5994

5995

if not self._yes_playlist(playlist_id, video_id):

5996

return self.url_result(

5997

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

5998

5999

data, ytcfg = self._extract_data(url, display_id)

6000

6001

# YouTube may provide a non-standard redirect to the regional channel

6002

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6003

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6004

redirect_url = traverse_obj(

6005

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6006

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6007

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6008

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6009

return self.url_result(redirect_url, YoutubeTabIE)

6010

6011

tabs, extra_tabs = self._extract_tab_renderers(data), []

6012

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6013

selected_tab = self._extract_selected_tab(tabs)

6014

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6015

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6016

6017

if not original_tab_id and selected_tab_name:

6018

self.to_screen('Downloading all uploads of the channel. '

6019

'To download only the videos in a specific tab, pass the tab\'s URL')

6020

if self._has_tab(tabs, 'streams'):

6021

extra_tabs.append(''.join((pre, '/streams', post)))

6022

if self._has_tab(tabs, 'shorts'):

6023

extra_tabs.append(''.join((pre, '/shorts', post)))

6024

# XXX: Members-only tab should also be extracted

6025

6026

if not extra_tabs and selected_tab_id != 'videos':

6027

# Channel does not have streams, shorts or videos tabs

6028

if item_id[:2] != 'UC':

6029

raise ExtractorError('This channel has no uploads', expected=True)

6030

6031

# Topic channels don't have /videos. Use the equivalent playlist instead

6032

pl_id = f'UU{item_id[2:]}'

6033

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6034

try:

6035

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6036

except ExtractorError:

6037

raise ExtractorError('This channel has no uploads', expected=True)

6038

else:

6039

item_id, url = pl_id, pl_url

6040

self.to_screen(

6041

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6042

6043

elif extra_tabs and selected_tab_id != 'videos':

6044

# When there are shorts/live tabs but not videos tab

6045

url, data = f'{pre}{post}', None

6046

6047

elif (original_tab_id or 'videos') != selected_tab_id:

6048

if original_tab_id == 'live':

6049

# Live tab should have redirected to the video

6050

# Except in the case the channel has an actual live tab

6051

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6052

raise UserNotLive(video_id=item_id)

6053

elif selected_tab_name:

6054

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6055

6056

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6057

url = f'{pre}{post}'

6058

6059

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6060

if 'no-youtube-unavailable-videos' not in compat_opts:

6061

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6062

self._extract_and_report_alerts(data, only_once=True)

6063

6064

tabs, entries = self._extract_tab_renderers(data), []

6065

if tabs:

6066

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6067

entries[0].update({

6068

'extractor_key': YoutubeTabIE.ie_key(),

6069

'extractor': YoutubeTabIE.IE_NAME,

6070

'webpage_url': url,

6071

})

6072

if self.get_param('playlist_items') == '0':

6073

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6074

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6075

entries.extend(map(self._real_extract, extra_tabs))

6076

6077

if len(entries) == 1:

6078

return entries[0]

6079

elif entries:

6080

metadata = self._extract_metadata_from_tabs(item_id, data)

6081

uploads_url = 'the Uploads (UU) playlist URL'

6082

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6083

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6084

self.to_screen(

6085

'Downloading as multiple playlists, separated by tabs. '

6086

f'To download as a single playlist instead, pass {uploads_url}')

6087

return self.playlist_result(entries, item_id, **metadata)

6088

6089

# Inline playlist

6090

playlist = traverse_obj(

6091

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6092

if playlist:

6093

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6094

6095

video_id = traverse_obj(

6096

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6097

if video_id:

6098

if tab != '/live': # live tab is expected to redirect to video

6099

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6100

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6101

6102

raise ExtractorError('Unable to recognize tab page')

6103

6104

6105

class YoutubePlaylistIE(InfoExtractor):

6106

IE_DESC = 'YouTube playlists'

6107

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6118

)''' % {

6119

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6120

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6121

}

6122

IE_NAME = 'youtube:playlist'

6123

_TESTS = [{

6124

'note': 'issue #673',

6125

'url': 'PLBB231211A4F62143',

6126

'info_dict': {

6127

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6128

'id': 'PLBB231211A4F62143',

6129

'uploader': 'Wickman',

6130

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6131

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6132

'view_count': int,

6133

'uploader_url': 'https://www.youtube.com/c/WickmanVT',

6134

'modified_date': r're:\d{8}',

6135

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6136

'channel': 'Wickman',

6137

'tags': [],

6138

'channel_url': 'https://www.youtube.com/c/WickmanVT',

6139

'availability': 'public',

6140

},

6141

'playlist_mincount': 29,

6142

}, {

6143

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6144

'info_dict': {

6145

'title': 'YDL_safe_search',

6146

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6147

},

6148

'playlist_count': 2,

6149

'skip': 'This playlist is private',

6150

}, {

6151

'note': 'embedded',

6152

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6157

'uploader': 'milan',

6158

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6159

'description': '',

6160

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6161

'tags': [],

6162

'modified_date': '20140919',

6163

'view_count': int,

6164

'channel': 'milan',

6165

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6166

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6167

'availability': 'public',

6168

},

6169

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],

6170

}, {

6171

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6172

'playlist_mincount': 455,

6173

'info_dict': {

6174

'title': '2018 Chinese New Singles (11/6 updated)',

6175

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6176

'uploader': 'LBK',

6177

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6178

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6179

'channel': 'LBK',

6180

'view_count': int,

6181

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

6182

'tags': [],

6183

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

6184

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6185

'modified_date': r're:\d{8}',

6186

'availability': 'public',

6187

},

6188

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6189

}, {

6190

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6191

'only_matching': True,

6192

}, {

6193

# music album playlist

6194

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6195

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6200

if YoutubeTabIE.suitable(url):

6201

return False

6202

from ..utils import parse_qs

6203

qs = parse_qs(url)

6204

if qs.get('v', [None])[0]:

6205

return False

6206

return super().suitable(url)

6207

6208

def _real_extract(self, url):

6209

playlist_id = self._match_id(url)

6210

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6211

url = update_url_query(

6212

'https://www.youtube.com/playlist',

6213

parse_qs(url) or {'list': playlist_id})

6214

if is_music_url:

6215

url = smuggle_url(url, {'is_music_url': True})

6216

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6217

6218

6219

class YoutubeYtBeIE(InfoExtractor):

6220

IE_DESC = 'youtu.be'

6221

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6222

_TESTS = [{

6223

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6228

'uploader': 'Backus-Page House Museum',

6229

'uploader_id': 'backuspagemuseum',

6230

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

6231

'upload_date': '20161008',

6232

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6233

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6238

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

6239

'channel': 'Backus-Page House Museum',

6240

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6241

'live_status': 'not_live',

6242

'view_count': int,

6243

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6244

'availability': 'public',

6245

'duration': 59,

6246

'comment_count': int,

6247

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6252

},

6253

}, {

6254

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6255

'only_matching': True,

6256

}]

6257

6258

def _real_extract(self, url):

6259

mobj = self._match_valid_url(url)

6260

video_id = mobj.group('id')

6261

playlist_id = mobj.group('playlist_id')

6262

return self.url_result(

6263

update_url_query('https://www.youtube.com/watch', {

6264

'v': video_id,

6265

'list': playlist_id,

6266

'feature': 'youtu.be',

6267

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6268

6269

6270

class YoutubeLivestreamEmbedIE(InfoExtractor):

6271

IE_DESC = 'YouTube livestream embeds'

6272

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6273

_TESTS = [{

6274

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6275

'only_matching': True,

6276

}]

6277

6278

def _real_extract(self, url):

6279

channel_id = self._match_id(url)

6280

return self.url_result(

6281

f'https://www.youtube.com/channel/{channel_id}/live',

6282

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6283

6284

6285

class YoutubeYtUserIE(InfoExtractor):

6286

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6287

IE_NAME = 'youtube:user'

6288

_VALID_URL = r'ytuser:(?P<id>.+)'

6289

_TESTS = [{

6290

'url': 'ytuser:phihag',

6291

'only_matching': True,

6292

}]

6293

6294

def _real_extract(self, url):

6295

user_id = self._match_id(url)

6296

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6297

6298

6299

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6300

IE_NAME = 'youtube:favorites'

6301

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6302

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6303

_LOGIN_REQUIRED = True

6304

_TESTS = [{

6305

'url': ':ytfav',

6306

'only_matching': True,

6307

}, {

6308

'url': ':ytfavorites',

6309

'only_matching': True,

6310

}]

6311

6312

def _real_extract(self, url):

6313

return self.url_result(

6314

'https://www.youtube.com/playlist?list=LL',

6315

ie=YoutubeTabIE.ie_key())

6316

6317

6318

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6319

IE_NAME = 'youtube:notif'

6320

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6321

_VALID_URL = r':ytnotif(?:ication)?s?'

6322

_LOGIN_REQUIRED = True

6323

_TESTS = [{

6324

'url': ':ytnotif',

6325

'only_matching': True,

6326

}, {

6327

'url': ':ytnotifications',

6328

'only_matching': True,

6329

}]

6330

6331

def _extract_notification_menu(self, response, continuation_list):

6332

notification_list = traverse_obj(

6333

response,

6334

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6335

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6336

expected_type=list) or []

6337

continuation_list[0] = None

6338

for item in notification_list:

6339

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6340

if entry:

6341

yield entry

6342

continuation = item.get('continuationItemRenderer')

6343

if continuation:

6344

continuation_list[0] = continuation

6345

6346

def _extract_notification_renderer(self, notification):

6347

video_id = traverse_obj(

6348

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6349

url = f'https://www.youtube.com/watch?v={video_id}'

6350

channel_id = None

6351

if not video_id:

6352

browse_ep = traverse_obj(

6353

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6354

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6355

post_id = self._search_regex(

6356

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6357

'post id', default=None)

6358

if not channel_id or not post_id:

6359

return

6360

# The direct /post url redirects to this in the browser

6361

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6362

6363

channel = traverse_obj(

6364

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6365

expected_type=str)

6366

notification_title = self._get_text(notification, 'shortMessage')

6367

if notification_title:

6368

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6369

# TODO: handle recommended videos

6370

title = self._search_regex(

6371

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6372

'video title', default=None)

6373

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6374

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6380

'video_id': video_id,

6381

'title': title,

6382

'channel_id': channel_id,

6383

'channel': channel,

6384

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6385

'timestamp': timestamp,

6386

}

6387

6388

def _notification_menu_entries(self, ytcfg):

6389

continuation_list = [None]

6390

response = None

6391

for page in itertools.count(1):

6392

ctoken = traverse_obj(

6393

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6394

response = self._extract_response(

6395

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6396

ep='notification/get_notification_menu', check_get_keys='actions',

6397

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6398

yield from self._extract_notification_menu(response, continuation_list)

6399

if not continuation_list[0]:

6400

break

6401

6402

def _real_extract(self, url):

6403

display_id = 'notifications'

6404

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6405

self._report_playlist_authcheck(ytcfg)

6406

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6407

6408

6409

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6410

IE_DESC = 'YouTube search'

6411

IE_NAME = 'youtube:search'

6412

_SEARCH_KEY = 'ytsearch'

6413

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6414

_TESTS = [{

6415

'url': 'ytsearch5:youtube-dl test video',

6416

'playlist_count': 5,

6417

'info_dict': {

6418

'id': 'youtube-dl test video',

6419

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6425

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6426

_SEARCH_KEY = 'ytsearchdate'

6427

IE_DESC = 'YouTube search, newest videos first'

6428

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6429

_TESTS = [{

6430

'url': 'ytsearchdate5:youtube-dl test video',

6431

'playlist_count': 5,

6432

'info_dict': {

6433

'id': 'youtube-dl test video',

6434

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6440

IE_DESC = 'YouTube search URLs with sorting and filter support'

6441

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6442

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6443

_TESTS = [{

6444

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6445

'playlist_mincount': 5,

6446

'info_dict': {

6447

'id': 'youtube-dl test video',

6448

'title': 'youtube-dl test video',

6449

}

6450

}, {

6451

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6452

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6459

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6464

# 'entries': [{

6465

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6471

'only_matching': True,

6472

}]

6473

6474

def _real_extract(self, url):

6475

qs = parse_qs(url)

6476

query = (qs.get('search_query') or qs.get('q'))[0]

6477

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6478

6479

6480

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6481

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6482

IE_NAME = 'youtube:music:search_url'

6483

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6484

_TESTS = [{

6485

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6486

'playlist_count': 16,

6487

'info_dict': {

6488

'id': 'royalty free music',

6489

'title': 'royalty free music',

6490

}

6491

}, {

6492

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6493

'playlist_mincount': 30,

6494

'info_dict': {

6495

'id': 'royalty free music - songs',

6496

'title': 'royalty free music - songs',

6497

},

6498

'params': {'extract_flat': 'in_playlist'}

6499

}, {

6500

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6501

'playlist_mincount': 30,

6502

'info_dict': {

6503

'id': 'royalty free music - community playlists',

6504

'title': 'royalty free music - community playlists',

6505

},

6506

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6511

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6512

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6513

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6514

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6515

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6516

}

6517

6518

def _real_extract(self, url):

6519

qs = parse_qs(url)

6520

query = (qs.get('search_query') or qs.get('q'))[0]

6521

params = qs.get('sp', (None,))[0]

6522

if params:

6523

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6524

else:

6525

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6526

params = self._SECTIONS.get(section)

6527

if not params:

6528

section = None

6529

title = join_nonempty(query, section, delim=' - ')

6530

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6531

6532

6533

class YoutubeFeedsInfoExtractor(InfoExtractor):

6534

"""

6535

Base class for feed extractors

6536

Subclasses must re-define the _FEED_NAME property.

6537

"""

6538

_LOGIN_REQUIRED = True

6539

_FEED_NAME = 'feeds'

6540

6541

def _real_initialize(self):

6542

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6547

6548

def _real_extract(self, url):

6549

return self.url_result(

6550

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6551

6552

6553

class YoutubeWatchLaterIE(InfoExtractor):

6554

IE_NAME = 'youtube:watchlater'

6555

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6556

_VALID_URL = r':ytwatchlater'

6557

_TESTS = [{

6558

'url': ':ytwatchlater',

6559

'only_matching': True,

6560

}]

6561

6562

def _real_extract(self, url):

6563

return self.url_result(

6564

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6565

6566

6567

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6568

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6569

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6570

_FEED_NAME = 'recommended'

6571

_LOGIN_REQUIRED = False

6572

_TESTS = [{

6573

'url': ':ytrec',

6574

'only_matching': True,

6575

}, {

6576

'url': ':ytrecommended',

6577

'only_matching': True,

6578

}, {

6579

'url': 'https://youtube.com',

6580

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6585

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6586

_VALID_URL = r':ytsub(?:scription)?s?'

6587

_FEED_NAME = 'subscriptions'

6588

_TESTS = [{

6589

'url': ':ytsubs',

6590

'only_matching': True,

6591

}, {

6592

'url': ':ytsubscriptions',

6593

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6598

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6599

_VALID_URL = r':ythis(?:tory)?'

6600

_FEED_NAME = 'history'

6601

_TESTS = [{

6602

'url': ':ythistory',

6603

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6608

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6609

IE_NAME = 'youtube:stories'

6610

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6611

_TESTS = [{

6612

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6613

'only_matching': True,

6614

}]

6615

6616

def _real_extract(self, url):

6617

playlist_id = f'RLTD{self._match_id(url)}'

6618

return self.url_result(

6619

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6620

ie=YoutubeTabIE, video_id=playlist_id)

6621

6622

6623

class YoutubeShortsAudioPivotIE(InfoExtractor):

6624

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6625

IE_NAME = 'youtube:shorts:pivot:audio'

6626

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6627

_TESTS = [{

6628

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6629

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6634

"""

6635

Generates sfv_audio_pivot browse params for this video id

6636

"""

6637

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6638

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6639

6640

def _real_extract(self, url):

6641

video_id = self._match_id(url)

6642

return self.url_result(

6643

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6648

IE_NAME = 'youtube:truncated_url'

6649

IE_DESC = False # Do not list

6650

_VALID_URL = r'''(?x)

6651

(?:https?://)?

6652

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6653

(?:watch\?(?:

6654

feature=[a-z_]+|

6655

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6668

'only_matching': True,

6669

}, {

6670

'url': 'https://www.youtube.com/watch?',

6671

'only_matching': True,

6672

}, {

6673

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6674

'only_matching': True,

6675

}, {

6676

'url': 'https://www.youtube.com/watch?feature=foo',

6677

'only_matching': True,

6678

}, {

6679

'url': 'https://www.youtube.com/watch?hl=en-GB',

6680

'only_matching': True,

6681

}, {

6682

'url': 'https://www.youtube.com/watch?t=2372',

6683

'only_matching': True,

6684

}]

6685

6686

def _real_extract(self, url):

6687

raise ExtractorError(

6688

'Did you forget to quote the URL? Remember that & is a meta '

6689

'character in most shells, so you want to put the URL in quotes, '

6690

'like youtube-dl '

6691

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6692

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6697

IE_NAME = 'youtube:clip'

6698

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6699

_TESTS = [{

6700

# FIXME: Other metadata should be extracted from the clip, not from the base video

6701

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6702

'info_dict': {

6703

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6704

'ext': 'mp4',

6705

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6710

'categories': ['Gaming'],

6711

'channel': 'Scott The Woz',

6712

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6713

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6714

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6715

'like_count': int,

6716

'playable_in_embed': True,

6717

'tags': 'count:17',

6718

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6719

'title': 'Mobile Games on Console - Scott The Woz',

6720

'upload_date': '20210920',

6721

'uploader': 'Scott The Woz',

6722

'uploader_id': 'scottthewoz',

6723

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6724

'view_count': int,

6725

'live_status': 'not_live',

6726

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6731

clip_id = self._match_id(url)

6732

_, data = self._extract_webpage(url, clip_id)

6733

6734

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6735

if not video_id:

6736

raise ExtractorError('Unable to find video ID')

6737

6738

clip_data = traverse_obj(data, (

6739

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6740

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6741

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6742

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6743

6744

return {

6745

'_type': 'url_transparent',

6746

'url': f'https://www.youtube.com/watch?v={video_id}',

6747

'ie_key': YoutubeIE.ie_key(),

6748

'id': clip_id,

6749

'section_start': int(clip_data['startTimeMs']) / 1000,

6750

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6755

IE_NAME = 'youtube:truncated_id'

6756

IE_DESC = False # Do not list

6757

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6758

6759

_TESTS = [{

6760

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6761

'only_matching': True,

6762

}]

6763

6764

def _real_extract(self, url):

6765

video_id = self._match_id(url)

6766

raise ExtractorError(

6767

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6768

expected=True)