jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import enum
	6	import hashlib
	7	import itertools
	8	import json
	9	import math
	10	import os.path
	11	import random
	12	import re
	13	import sys
	14	import threading
	15	import time
	16	import traceback
	17	import urllib.error
	18	import urllib.parse
	19
	20	from .common import InfoExtractor, SearchInfoExtractor
	21	from .openload import PhantomJSwrapper
	22	from ..compat import functools
	23	from ..jsinterp import JSInterpreter
	24	from ..utils import (
	25	NO_DEFAULT,
	26	ExtractorError,
	27	LazyList,
	28	UserNotLive,
	29	bug_reports_message,
	30	classproperty,
	31	clean_html,
	32	datetime_from_str,
	33	dict_get,
	34	filter_dict,
	35	float_or_none,
	36	format_field,
	37	get_first,
	38	int_or_none,
	39	is_html,
	40	join_nonempty,
	41	js_to_json,
	42	mimetype2ext,
	43	network_exceptions,
	44	orderedSet,
	45	parse_codecs,
	46	parse_count,
	47	parse_duration,
	48	parse_iso8601,
	49	parse_qs,
	50	qualities,
	51	remove_start,
	52	smuggle_url,
	53	str_or_none,
	54	str_to_int,
	55	strftime_or_none,
	56	traverse_obj,
	57	try_get,
	58	unescapeHTML,
	59	unified_strdate,
	60	unified_timestamp,
	61	unsmuggle_url,
	62	update_url_query,
	63	url_or_none,
	64	urljoin,
	65	variadic,
	66	)
	67
	68	# any clients starting with _ cannot be explicitly requested by the user
	69	INNERTUBE_CLIENTS = {
	70	'web': {
	71	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	72	'INNERTUBE_CONTEXT': {
	73	'client': {
	74	'clientName': 'WEB',
	75	'clientVersion': '2.20220801.00.00',
	76	}
	77	},
	78	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	79	},
	80	'web_embedded': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB_EMBEDDED_PLAYER',
	85	'clientVersion': '1.20220731.00.00',
	86	},
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	89	},
	90	'web_music': {
	91	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	92	'INNERTUBE_HOST': 'music.youtube.com',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_REMIX',
	96	'clientVersion': '1.20220727.01.00',
	97	}
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	100	},
	101	'web_creator': {
	102	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_CREATOR',
	106	'clientVersion': '1.20220726.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	110	},
	111	'android': {
	112	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'ANDROID',
	116	'clientVersion': '17.31.35',
	117	'androidSdkVersion': 30,
	118	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	119	}
	120	},
	121	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	122	'REQUIRE_JS_PLAYER': False
	123	},
	124	'android_embedded': {
	125	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	126	'INNERTUBE_CONTEXT': {
	127	'client': {
	128	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	129	'clientVersion': '17.31.35',
	130	'androidSdkVersion': 30,
	131	'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
	132	},
	133	},
	134	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	135	'REQUIRE_JS_PLAYER': False
	136	},
	137	'android_music': {
	138	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	139	'INNERTUBE_CONTEXT': {
	140	'client': {
	141	'clientName': 'ANDROID_MUSIC',
	142	'clientVersion': '5.16.51',
	143	'androidSdkVersion': 30,
	144	'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
	145	}
	146	},
	147	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	148	'REQUIRE_JS_PLAYER': False
	149	},
	150	'android_creator': {
	151	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	152	'INNERTUBE_CONTEXT': {
	153	'client': {
	154	'clientName': 'ANDROID_CREATOR',
	155	'clientVersion': '22.30.100',
	156	'androidSdkVersion': 30,
	157	'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '17.33.2',
	171	'deviceModel': 'iPhone14,3',
	172	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	173	}
	174	},
	175	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	176	'REQUIRE_JS_PLAYER': False
	177	},
	178	'ios_embedded': {
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MESSAGES_EXTENSION',
	182	'clientVersion': '17.33.2',
	183	'deviceModel': 'iPhone14,3',
	184	'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '5.21',
	196	'deviceModel': 'iPhone14,3',
	197	'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	198	},
	199	},
	200	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	201	'REQUIRE_JS_PLAYER': False
	202	},
	203	'ios_creator': {
	204	'INNERTUBE_CONTEXT': {
	205	'client': {
	206	'clientName': 'IOS_CREATOR',
	207	'clientVersion': '22.33.101',
	208	'deviceModel': 'iPhone14,3',
	209	'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
	210	},
	211	},
	212	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	213	'REQUIRE_JS_PLAYER': False
	214	},
	215	# mweb has 'ultralow' formats
	216	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	217	'mweb': {
	218	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	219	'INNERTUBE_CONTEXT': {
	220	'client': {
	221	'clientName': 'MWEB',
	222	'clientVersion': '2.20220801.00.00',
	223	}
	224	},
	225	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	226	},
	227	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	228	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	229	'tv_embedded': {
	230	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	231	'INNERTUBE_CONTEXT': {
	232	'client': {
	233	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	234	'clientVersion': '2.0',
	235	},
	236	},
	237	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	238	},
	239	}
	240
	241
	242	def _split_innertube_client(client_name):
	243	variant, *base = client_name.rsplit('.', 1)
	244	if base:
	245	return variant, base[0], variant
	246	base, *variant = client_name.split('_', 1)
	247	return client_name, base, variant[0] if variant else None
	248
	249
	250	def build_innertube_clients():
	251	THIRD_PARTY = {
	252	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	253	}
	254	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	255	priority = qualities(BASE_CLIENTS[::-1])
	256
	257	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	258	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	259	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	260	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	261	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	262
	263	_, base_client, variant = _split_innertube_client(client)
	264	ytcfg['priority'] = 10 * priority(base_client)
	265
	266	if not variant:
	267	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	268	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	269	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	270	embedscreen['priority'] -= 3
	271	elif variant == 'embedded':
	272	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	273	ytcfg['priority'] -= 2
	274	else:
	275	ytcfg['priority'] -= 3
	276
	277
	278	build_innertube_clients()
	279
	280
	281	class BadgeType(enum.Enum):
	282	AVAILABILITY_UNLISTED = enum.auto()
	283	AVAILABILITY_PRIVATE = enum.auto()
	284	AVAILABILITY_PUBLIC = enum.auto()
	285	AVAILABILITY_PREMIUM = enum.auto()
	286	AVAILABILITY_SUBSCRIPTION = enum.auto()
	287	LIVE_NOW = enum.auto()
	288
	289
	290	class YoutubeBaseInfoExtractor(InfoExtractor):
	291	"""Provide base functions for Youtube extractors"""
	292
	293	_RESERVED_NAMES = (
	294	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	295	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	296	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|source\|'
	297	r'storefront\|oops\|index\|account\|t/terms\|about\|upload\|signin\|logout')
	298
	299	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	300
	301	# _NETRC_MACHINE = 'youtube'
	302
	303	# If True it will raise an error if no login info is provided
	304	_LOGIN_REQUIRED = False
	305
	306	_INVIDIOUS_SITES = (
	307	# invidious-redirect websites
	308	r'(?:www\.)?redirect\.invidious\.io',
	309	r'(?:(?:www\|dev)\.)?invidio\.us',
	310	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	311	r'(?:www\.)?invidious\.pussthecat\.org',
	312	r'(?:www\.)?invidious\.zee\.li',
	313	r'(?:www\.)?invidious\.ethibox\.fr',
	314	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	315	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	316	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	317	# youtube-dl invidious instances list
	318	r'(?:(?:www\|no)\.)?invidiou\.sh',
	319	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	320	r'(?:www\.)?invidious\.kabi\.tk',
	321	r'(?:www\.)?invidious\.mastodon\.host',
	322	r'(?:www\.)?invidious\.zapashcanon\.fr',
	323	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	324	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	325	r'(?:www\.)?invidious\.himiko\.cloud',
	326	r'(?:www\.)?invidious\.reallyancient\.tech',
	327	r'(?:www\.)?invidious\.tube',
	328	r'(?:www\.)?invidiou\.site',
	329	r'(?:www\.)?invidious\.site',
	330	r'(?:www\.)?invidious\.xyz',
	331	r'(?:www\.)?invidious\.nixnet\.xyz',
	332	r'(?:www\.)?invidious\.048596\.xyz',
	333	r'(?:www\.)?invidious\.drycat\.fr',
	334	r'(?:www\.)?inv\.skyn3t\.in',
	335	r'(?:www\.)?tube\.poal\.co',
	336	r'(?:www\.)?tube\.connect\.cafe',
	337	r'(?:www\.)?vid\.wxzm\.sx',
	338	r'(?:www\.)?vid\.mint\.lgbt',
	339	r'(?:www\.)?vid\.puffyan\.us',
	340	r'(?:www\.)?yewtu\.be',
	341	r'(?:www\.)?yt\.elukerio\.org',
	342	r'(?:www\.)?yt\.lelux\.fi',
	343	r'(?:www\.)?invidious\.ggc-project\.de',
	344	r'(?:www\.)?yt\.maisputain\.ovh',
	345	r'(?:www\.)?ytprivate\.com',
	346	r'(?:www\.)?invidious\.13ad\.de',
	347	r'(?:www\.)?invidious\.toot\.koeln',
	348	r'(?:www\.)?invidious\.fdn\.fr',
	349	r'(?:www\.)?watch\.nettohikari\.com',
	350	r'(?:www\.)?invidious\.namazso\.eu',
	351	r'(?:www\.)?invidious\.silkky\.cloud',
	352	r'(?:www\.)?invidious\.exonip\.de',
	353	r'(?:www\.)?invidious\.riverside\.rocks',
	354	r'(?:www\.)?invidious\.blamefran\.net',
	355	r'(?:www\.)?invidious\.moomoo\.de',
	356	r'(?:www\.)?ytb\.trom\.tf',
	357	r'(?:www\.)?yt\.cyberhost\.uk',
	358	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	359	r'(?:www\.)?qklhadlycap4cnod\.onion',
	360	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	361	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	362	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	363	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	364	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	365	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	366	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	367	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	368	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	369	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	370	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	371	r'(?:www\.)?piped\.kavin\.rocks',
	372	r'(?:www\.)?piped\.tokhmi\.xyz',
	373	r'(?:www\.)?piped\.syncpundit\.io',
	374	r'(?:www\.)?piped\.mha\.fi',
	375	r'(?:www\.)?watch\.whatever\.social',
	376	r'(?:www\.)?piped\.garudalinux\.org',
	377	r'(?:www\.)?piped\.rivo\.lol',
	378	r'(?:www\.)?piped-libre\.kavin\.rocks',
	379	r'(?:www\.)?yt\.jae\.fi',
	380	r'(?:www\.)?piped\.mint\.lgbt',
	381	r'(?:www\.)?il\.ax',
	382	r'(?:www\.)?piped\.esmailelbob\.xyz',
	383	r'(?:www\.)?piped\.projectsegfau\.lt',
	384	r'(?:www\.)?piped\.privacydev\.net',
	385	r'(?:www\.)?piped\.palveluntarjoaja\.eu',
	386	r'(?:www\.)?piped\.smnz\.de',
	387	r'(?:www\.)?piped\.adminforge\.de',
	388	r'(?:www\.)?watch\.whatevertinfoil\.de',
	389	r'(?:www\.)?piped\.qdi\.fi',
	390	)
	391
	392	# extracted from account/account_menu ep
	393	# XXX: These are the supported YouTube UI and API languages,
	394	# which is slightly different from languages supported for translation in YouTube studio
	395	_SUPPORTED_LANG_CODES = [
	396	'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
	397	'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
	398	'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
	399	'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
	400	'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
	401	'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
	402	]
	403
	404	_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
	405
	406	@functools.cached_property
	407	def _preferred_lang(self):
	408	"""
	409	Returns a language code supported by YouTube for the user preferred language.
	410	Returns None if no preferred language set.
	411	"""
	412	preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
	413	if not preferred_lang:
	414	return
	415	if preferred_lang not in self._SUPPORTED_LANG_CODES:
	416	raise ExtractorError(
	417	f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
	418	expected=True)
	419	elif preferred_lang != 'en':
	420	self.report_warning(
	421	f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
	422	return preferred_lang
	423
	424	def _initialize_consent(self):
	425	cookies = self._get_cookies('https://www.youtube.com/')
	426	if cookies.get('__Secure-3PSID'):
	427	return
	428	consent_id = None
	429	consent = cookies.get('CONSENT')
	430	if consent:
	431	if 'YES' in consent.value:
	432	return
	433	consent_id = self._search_regex(
	434	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	435	if not consent_id:
	436	consent_id = random.randint(100, 999)
	437	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	438
	439	def _initialize_pref(self):
	440	cookies = self._get_cookies('https://www.youtube.com/')
	441	pref_cookie = cookies.get('PREF')
	442	pref = {}
	443	if pref_cookie:
	444	try:
	445	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	446	except ValueError:
	447	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	448	pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
	449	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	450
	451	def _real_initialize(self):
	452	self._initialize_pref()
	453	self._initialize_consent()
	454	self._check_login_required()
	455
	456	def _check_login_required(self):
	457	if self._LOGIN_REQUIRED and not self._cookies_passed:
	458	self.raise_login_required('Login details are needed to download this content', method='cookies')
	459
	460	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	461	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	462
	463	def _get_default_ytcfg(self, client='web'):
	464	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	465
	466	def _get_innertube_host(self, client='web'):
	467	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	468
	469	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	470	# try_get but with fallback to default ytcfg client values when present
	471	_func = lambda y: try_get(y, getter, expected_type)
	472	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	473
	474	def _extract_client_name(self, ytcfg, default_client='web'):
	475	return self._ytcfg_get_safe(
	476	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	477	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	478
	479	def _extract_client_version(self, ytcfg, default_client='web'):
	480	return self._ytcfg_get_safe(
	481	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	482	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	483
	484	def _select_api_hostname(self, req_api_hostname, default_client=None):
	485	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	486	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	487
	488	def _extract_api_key(self, ytcfg=None, default_client='web'):
	489	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	490
	491	def _extract_context(self, ytcfg=None, default_client='web'):
	492	context = get_first(
	493	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	494	# Enforce language and tz for extraction
	495	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	496	client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	497	return context
	498
	499	_SAPISID = None
	500

1

import base64

import calendar

import copy

import datetime

import enum

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

21

from .openload import PhantomJSwrapper

22

from ..compat import functools

23

from ..jsinterp import JSInterpreter

24

from ..utils import (

NO_DEFAULT,

ExtractorError,

LazyList,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

filter_dict,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

69

INNERTUBE_CLIENTS = {

70

'web': {

71

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

72

'INNERTUBE_CONTEXT': {

73

'client': {

74

'clientName': 'WEB',

75

'clientVersion': '2.20220801.00.00',

76

}

77

},

78

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

79

},

80

'web_embedded': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB_EMBEDDED_PLAYER',

85

'clientVersion': '1.20220731.00.00',

86

},

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

89

},

90

'web_music': {

91

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

92

'INNERTUBE_HOST': 'music.youtube.com',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_REMIX',

96

'clientVersion': '1.20220727.01.00',

97

}

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

100

},

101

'web_creator': {

102

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_CREATOR',

106

'clientVersion': '1.20220726.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

110

},

111

'android': {

112

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'ANDROID',

116

'clientVersion': '17.31.35',

117

'androidSdkVersion': 30,

118

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

119

}

120

},

121

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

122

'REQUIRE_JS_PLAYER': False

123

},

124

'android_embedded': {

125

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

126

'INNERTUBE_CONTEXT': {

127

'client': {

128

'clientName': 'ANDROID_EMBEDDED_PLAYER',

129

'clientVersion': '17.31.35',

130

'androidSdkVersion': 30,

131

'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'

132

},

133

},

134

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

135

'REQUIRE_JS_PLAYER': False

136

},

137

'android_music': {

138

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

139

'INNERTUBE_CONTEXT': {

140

'client': {

141

'clientName': 'ANDROID_MUSIC',

142

'clientVersion': '5.16.51',

143

'androidSdkVersion': 30,

144

'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'

145

}

146

},

147

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

148

'REQUIRE_JS_PLAYER': False

149

},

150

'android_creator': {

151

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

152

'INNERTUBE_CONTEXT': {

153

'client': {

154

'clientName': 'ANDROID_CREATOR',

155

'clientVersion': '22.30.100',

156

'androidSdkVersion': 30,

157

'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '17.33.2',

171

'deviceModel': 'iPhone14,3',

172

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

173

}

174

},

175

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

176

'REQUIRE_JS_PLAYER': False

177

},

178

'ios_embedded': {

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MESSAGES_EXTENSION',

182

'clientVersion': '17.33.2',

183

'deviceModel': 'iPhone14,3',

184

'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '5.21',

196

'deviceModel': 'iPhone14,3',

197

'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

198

},

199

},

200

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

201

'REQUIRE_JS_PLAYER': False

202

},

203

'ios_creator': {

204

'INNERTUBE_CONTEXT': {

205

'client': {

206

'clientName': 'IOS_CREATOR',

207

'clientVersion': '22.33.101',

208

'deviceModel': 'iPhone14,3',

209

'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'

210

},

211

},

212

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

213

'REQUIRE_JS_PLAYER': False

214

},

215

# mweb has 'ultralow' formats

216

# See: https://github.com/yt-dlp/yt-dlp/pull/557

217

'mweb': {

218

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

219

'INNERTUBE_CONTEXT': {

220

'client': {

221

'clientName': 'MWEB',

222

'clientVersion': '2.20220801.00.00',

223

}

224

},

225

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

226

},

227

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

228

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

229

'tv_embedded': {

230

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

231

'INNERTUBE_CONTEXT': {

232

'client': {

233

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

234

'clientVersion': '2.0',

235

},

236

},

237

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

243

variant, *base = client_name.rsplit('.', 1)

244

if base:

245

return variant, base[0], variant

246

base, *variant = client_name.split('_', 1)

247

return client_name, base, variant[0] if variant else None

248

249

250

def build_innertube_clients():

251

THIRD_PARTY = {

252

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

253

}

254

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

255

priority = qualities(BASE_CLIENTS[::-1])

256

257

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

258

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

259

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

260

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

261

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

262

263

_, base_client, variant = _split_innertube_client(client)

264

ytcfg['priority'] = 10 * priority(base_client)

265

266

if not variant:

267

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

268

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

269

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

270

embedscreen['priority'] -= 3

271

elif variant == 'embedded':

272

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

273

ytcfg['priority'] -= 2

274

else:

275

ytcfg['priority'] -= 3

276

277

278

build_innertube_clients()

279

280

281

class BadgeType(enum.Enum):

282

AVAILABILITY_UNLISTED = enum.auto()

283

AVAILABILITY_PRIVATE = enum.auto()

284

AVAILABILITY_PUBLIC = enum.auto()

285

AVAILABILITY_PREMIUM = enum.auto()

286

AVAILABILITY_SUBSCRIPTION = enum.auto()

287

LIVE_NOW = enum.auto()

288

289

290

class YoutubeBaseInfoExtractor(InfoExtractor):

291

"""Provide base functions for Youtube extractors"""

292

293

_RESERVED_NAMES = (

294

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

300

301

# _NETRC_MACHINE = 'youtube'

302

303

# If True it will raise an error if no login info is provided

304

_LOGIN_REQUIRED = False

305

306

_INVIDIOUS_SITES = (

307

# invidious-redirect websites

308

r'(?:www\.)?redirect\.invidious\.io',

309

r'(?:(?:www|dev)\.)?invidio\.us',

310

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

311

r'(?:www\.)?invidious\.pussthecat\.org',

312

r'(?:www\.)?invidious\.zee\.li',

313

r'(?:www\.)?invidious\.ethibox\.fr',

314

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

315

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

316

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

317

# youtube-dl invidious instances list

318

r'(?:(?:www|no)\.)?invidiou\.sh',

319

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

320

r'(?:www\.)?invidious\.kabi\.tk',

321

r'(?:www\.)?invidious\.mastodon\.host',

322

r'(?:www\.)?invidious\.zapashcanon\.fr',

323

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

324

r'(?:www\.)?invidious\.tinfoil-hat\.net',

325

r'(?:www\.)?invidious\.himiko\.cloud',

326

r'(?:www\.)?invidious\.reallyancient\.tech',

327

r'(?:www\.)?invidious\.tube',

328

r'(?:www\.)?invidiou\.site',

329

r'(?:www\.)?invidious\.site',

330

r'(?:www\.)?invidious\.xyz',

331

r'(?:www\.)?invidious\.nixnet\.xyz',

332

r'(?:www\.)?invidious\.048596\.xyz',

333

r'(?:www\.)?invidious\.drycat\.fr',

334

r'(?:www\.)?inv\.skyn3t\.in',

335

r'(?:www\.)?tube\.poal\.co',

336

r'(?:www\.)?tube\.connect\.cafe',

337

r'(?:www\.)?vid\.wxzm\.sx',

338

r'(?:www\.)?vid\.mint\.lgbt',

339

r'(?:www\.)?vid\.puffyan\.us',

340

r'(?:www\.)?yewtu\.be',

341

r'(?:www\.)?yt\.elukerio\.org',

342

r'(?:www\.)?yt\.lelux\.fi',

343

r'(?:www\.)?invidious\.ggc-project\.de',

344

r'(?:www\.)?yt\.maisputain\.ovh',

345

r'(?:www\.)?ytprivate\.com',

346

r'(?:www\.)?invidious\.13ad\.de',

347

r'(?:www\.)?invidious\.toot\.koeln',

348

r'(?:www\.)?invidious\.fdn\.fr',

349

r'(?:www\.)?watch\.nettohikari\.com',

350

r'(?:www\.)?invidious\.namazso\.eu',

351

r'(?:www\.)?invidious\.silkky\.cloud',

352

r'(?:www\.)?invidious\.exonip\.de',

353

r'(?:www\.)?invidious\.riverside\.rocks',

354

r'(?:www\.)?invidious\.blamefran\.net',

355

r'(?:www\.)?invidious\.moomoo\.de',

356

r'(?:www\.)?ytb\.trom\.tf',

357

r'(?:www\.)?yt\.cyberhost\.uk',

358

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

359

r'(?:www\.)?qklhadlycap4cnod\.onion',

360

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

361

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

362

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

363

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

364

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

365

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

366

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

367

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

368

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

369

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

370

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

371

r'(?:www\.)?piped\.kavin\.rocks',

372

r'(?:www\.)?piped\.tokhmi\.xyz',

373

r'(?:www\.)?piped\.syncpundit\.io',

374

r'(?:www\.)?piped\.mha\.fi',

375

r'(?:www\.)?watch\.whatever\.social',

376

r'(?:www\.)?piped\.garudalinux\.org',

377

r'(?:www\.)?piped\.rivo\.lol',

378

r'(?:www\.)?piped-libre\.kavin\.rocks',

379

r'(?:www\.)?yt\.jae\.fi',

380

r'(?:www\.)?piped\.mint\.lgbt',

381

r'(?:www\.)?il\.ax',

382

r'(?:www\.)?piped\.esmailelbob\.xyz',

383

r'(?:www\.)?piped\.projectsegfau\.lt',

384

r'(?:www\.)?piped\.privacydev\.net',

385

r'(?:www\.)?piped\.palveluntarjoaja\.eu',

386

r'(?:www\.)?piped\.smnz\.de',

387

r'(?:www\.)?piped\.adminforge\.de',

388

r'(?:www\.)?watch\.whatevertinfoil\.de',

389

r'(?:www\.)?piped\.qdi\.fi',

390

)

391

392

# extracted from account/account_menu ep

393

# XXX: These are the supported YouTube UI and API languages,

394

# which is slightly different from languages supported for translation in YouTube studio

395

_SUPPORTED_LANG_CODES = [

396

'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',

397

'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',

398

'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',

399

'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',

400

'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',

401

'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'

402

]

403

404

_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}

405

406

@functools.cached_property

407

def _preferred_lang(self):

408

"""

409

Returns a language code supported by YouTube for the user preferred language.

410

Returns None if no preferred language set.

411

"""

412

preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]

413

if not preferred_lang:

414

return

415

if preferred_lang not in self._SUPPORTED_LANG_CODES:

416

raise ExtractorError(

417

f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',

418

expected=True)

419

elif preferred_lang != 'en':

420

self.report_warning(

421

f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')

422

return preferred_lang

423

424

def _initialize_consent(self):

425

cookies = self._get_cookies('https://www.youtube.com/')

426

if cookies.get('__Secure-3PSID'):

427

return

428

consent_id = None

429

consent = cookies.get('CONSENT')

430

if consent:

431

if 'YES' in consent.value:

432

return

433

consent_id = self._search_regex(

434

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

435

if not consent_id:

436

consent_id = random.randint(100, 999)

437

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

438

439

def _initialize_pref(self):

440

cookies = self._get_cookies('https://www.youtube.com/')

441

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

446

except ValueError:

447

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

448

pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})

449

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

450

451

def _real_initialize(self):

452

self._initialize_pref()

453

self._initialize_consent()

454

self._check_login_required()

455

456

def _check_login_required(self):

457

if self._LOGIN_REQUIRED and not self._cookies_passed:

458

self.raise_login_required('Login details are needed to download this content', method='cookies')

459

460

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

461

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

462

463

def _get_default_ytcfg(self, client='web'):

464

return copy.deepcopy(INNERTUBE_CLIENTS[client])

465

466

def _get_innertube_host(self, client='web'):

467

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

468

469

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

470

# try_get but with fallback to default ytcfg client values when present

471

_func = lambda y: try_get(y, getter, expected_type)

472

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

473

474

def _extract_client_name(self, ytcfg, default_client='web'):

475

return self._ytcfg_get_safe(

476

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

477

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

478

479

def _extract_client_version(self, ytcfg, default_client='web'):

480

return self._ytcfg_get_safe(

481

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

482

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

483

484

def _select_api_hostname(self, req_api_hostname, default_client=None):

485

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

486

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

487

488

def _extract_api_key(self, ytcfg=None, default_client='web'):

489

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

490

491

def _extract_context(self, ytcfg=None, default_client='web'):

492

context = get_first(

493

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

494

# Enforce language and tz for extraction

495

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

496

client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

502

time_now = round(time.time())

503

if self._SAPISID is None:

504

yt_cookies = self._get_cookies('https://www.youtube.com')

505

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

506

# See: https://github.com/yt-dlp/yt-dlp/issues/393

507

sapisid_cookie = dict_get(

508

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

509

if sapisid_cookie and sapisid_cookie.value:

510

self._SAPISID = sapisid_cookie.value

511

self.write_debug('Extracted SAPISID cookie')

512

# SAPISID cookie is required if not already present

513

if not yt_cookies.get('SAPISID'):

514

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

515

self._set_cookie(

516

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

517

else:

518

self._SAPISID = False

519

if not self._SAPISID:

520

return None

521

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

522

sapisidhash = hashlib.sha1(

523

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

524

return f'SAPISIDHASH {time_now}_{sapisidhash}'

525

526

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

527

note='Downloading API JSON', errnote='Unable to download API page',

528

context=None, api_key=None, api_hostname=None, default_client='web'):

529

530

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

531

data.update(query)

532

real_headers = self.generate_api_headers(default_client=default_client)

533

real_headers.update({'content-type': 'application/json'})

534

if headers:

535

real_headers.update(headers)

536

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

537

or api_key or self._extract_api_key(default_client=default_client))

538

return self._download_json(

539

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

540

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

541

data=json.dumps(data).encode('utf8'), headers=real_headers,

542

query={'key': api_key, 'prettyPrint': 'false'})

543

544

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

545

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

546

547

@staticmethod

548

def _extract_session_index(*data):

549

"""

550

Index of current account in account list.

551

See: https://github.com/yt-dlp/yt-dlp/pull/519

552

"""

553

for ytcfg in data:

554

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

555

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

560

if ytcfg:

561

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

566

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

567

'identity token', default=None, fatal=False)

568

569

@staticmethod

570

def _extract_account_syncid(*args):

571

"""

572

Extract syncId required to download private playlists of secondary channels

573

@params response and/or ytcfg

574

"""

575

for data in args:

576

# ytcfg includes channel_syncid if on secondary channel

577

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

582

lambda x: x['DATASYNC_ID']), str) or '').split('||')

583

if len(sync_ids) >= 2 and sync_ids[1]:

584

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

585

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

590

"""

591

Extracts visitorData from an API response or ytcfg

592

Appears to be used to track session state

593

"""

594

return get_first(

595

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

596

expected_type=str)

597

598

@functools.cached_property

599

def is_authenticated(self):

600

return bool(self._generate_sapisidhash_header())

601

602

def extract_ytcfg(self, video_id, webpage):

603

if not webpage:

604

return {}

605

return self._parse_json(

606

self._search_regex(

607

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

608

default='{}'), video_id, fatal=False) or {}

609

610

def generate_api_headers(

611

self, *, ytcfg=None, account_syncid=None, session_index=None,

612

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

613

614

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

615

headers = {

616

'X-YouTube-Client-Name': str(

617

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

618

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

619

'Origin': origin,

620

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

621

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

622

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),

623

'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)

624

}

625

if session_index is None:

626

session_index = self._extract_session_index(ytcfg)

627

if account_syncid or session_index is not None:

628

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

629

630

auth = self._generate_sapisidhash_header(origin)

631

if auth is not None:

632

headers['Authorization'] = auth

633

headers['X-Origin'] = origin

634

return filter_dict(headers)

635

636

def _download_ytcfg(self, client, video_id):

637

url = {

638

'web': 'https://www.youtube.com',

639

'web_music': 'https://music.youtube.com',

640

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

645

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

646

return self.extract_ytcfg(video_id, webpage) or {}

647

648

@staticmethod

649

def _build_api_continuation_query(continuation, ctp=None):

650

query = {

651

'continuation': continuation

652

}

653

# TODO: Inconsistency with clickTrackingParams.

654

# Currently we have a fixed ctp contained within context (from ytcfg)

655

# and a ctp in root query for continuation.

656

if ctp:

657

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

662

next_continuation = try_get(

663

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

664

lambda x: x['continuation']['reloadContinuationData']), dict)

665

if not next_continuation:

666

return

667

continuation = next_continuation.get('continuation')

668

if not continuation:

669

return

670

ctp = next_continuation.get('clickTrackingParams')

671

return cls._build_api_continuation_query(continuation, ctp)

672

673

@classmethod

674

def _extract_continuation_ep_data(cls, continuation_ep: dict):

675

if isinstance(continuation_ep, dict):

676

continuation = try_get(

677

continuation_ep, lambda x: x['continuationCommand']['token'], str)

678

if not continuation:

679

return

680

ctp = continuation_ep.get('clickTrackingParams')

681

return cls._build_api_continuation_query(continuation, ctp)

682

683

@classmethod

684

def _extract_continuation(cls, renderer):

685

next_continuation = cls._extract_next_continuation_data(renderer)

686

if next_continuation:

687

return next_continuation

688

689

return traverse_obj(renderer, (

690

('contents', 'items', 'rows'), ..., 'continuationItemRenderer',

691

('continuationEndpoint', ('button', 'buttonRenderer', 'command'))

692

), get_all=False, expected_type=cls._extract_continuation_ep_data)

693

694

@classmethod

695

def _extract_alerts(cls, data):

696

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

697

if not isinstance(alert_dict, dict):

698

continue

699

for alert in alert_dict.values():

700

alert_type = alert.get('type')

701

if not alert_type:

702

continue

703

message = cls._get_text(alert, 'text')

704

if message:

705

yield alert_type, message

706

707

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

708

errors, warnings = [], []

709

for alert_type, alert_message in alerts:

710

if alert_type.lower() == 'error' and fatal:

711

errors.append([alert_type, alert_message])

712

elif alert_message not in self._IGNORED_WARNINGS:

713

warnings.append([alert_type, alert_message])

714

715

for alert_type, alert_message in (warnings + errors[:-1]):

716

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

717

if errors:

718

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

719

720

def _extract_and_report_alerts(self, data, *args, **kwargs):

721

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

722

723

def _extract_badges(self, renderer: dict):

724

privacy_icon_map = {

725

'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,

726

'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,

727

'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC

}

badge_style_map = {

'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,

732

'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,

733

'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW

}

label_map = {

'unlisted': BadgeType.AVAILABILITY_UNLISTED,

738

'private': BadgeType.AVAILABILITY_PRIVATE,

739

'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,

740

'live': BadgeType.LIVE_NOW,

741

'premium': BadgeType.AVAILABILITY_PREMIUM

}

badges = []

for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):

746

badge_type = (

747

privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))

748

or badge_style_map.get(traverse_obj(badge, 'style'))

749

)

750

if badge_type:

751

badges.append({'type': badge_type})

752

continue

753

754

# fallback, won't work in some languages

755

label = traverse_obj(badge, 'label', expected_type=str, default='')

756

for match, label_badge_type in label_map.items():

757

if match in label.lower():

758

badges.append({'type': badge_type})

continue

return badges

@staticmethod

def _has_badge(badges, badge_type):

765

return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))

766

767

@staticmethod

768

def _get_text(data, *path_list, max_runs=None):

769

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

774

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

775

obj = [obj]

776

for item in obj:

777

text = try_get(item, lambda x: x['simpleText'], str)

778

if text:

779

return text

780

runs = try_get(item, lambda x: x['runs'], list) or []

781

if not runs and isinstance(item, list):

782

runs = item

783

784

runs = runs[:min(len(runs), max_runs or len(runs))]

785

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

790

count_text = self._get_text(data, *path_list) or ''

791

count = parse_count(count_text)

792

if count is None:

793

count = str_to_int(

794

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

799

"""

800

Extract thumbnails from thumbnails dict

801

@param path_list: path list to level that contains 'thumbnails' key

802

"""

803

thumbnails = []

804

for path in path_list or [()]:

805

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

806

thumbnail_url = url_or_none(thumbnail.get('url'))

807

if not thumbnail_url:

808

continue

809

# Sometimes youtube gives a wrong thumbnail URL. See:

810

# https://github.com/yt-dlp/yt-dlp/issues/233

811

# https://github.com/ytdl-org/youtube-dl/issues/28023

812

if 'maxresdefault' in thumbnail_url:

813

thumbnail_url = thumbnail_url.split('?')[0]

814

thumbnails.append({

815

'url': thumbnail_url,

816

'height': int_or_none(thumbnail.get('height')),

817

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

823

"""

824

Extracts a relative time from string and converts to dt object

825

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

830

if start:

831

return datetime_from_str(start)

832

try:

833

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _parse_time_text(self, text):

838

if not text:

839

return

840

dt = self.extract_relative_time(text)

841

timestamp = None

842

if isinstance(dt, datetime.datetime):

843

timestamp = calendar.timegm(dt.timetuple())

844

845

if timestamp is None:

846

timestamp = (

847

unified_timestamp(text) or unified_timestamp(

848

self._search_regex(

849

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

850

text.lower(), 'time text', default=None)))

851

852

if text and timestamp is None and self._preferred_lang in (None, 'en'):

853

self.report_warning(

854

f'Cannot parse localized time text "{text}"', only_once=True)

855

return timestamp

856

857

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

858

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

859

default_client='web'):

860

for retry in self.RetryManager():

861

try:

862

response = self._call_api(

863

ep=ep, fatal=True, headers=headers,

864

video_id=item_id, query=query, note=note,

865

context=self._extract_context(ytcfg, default_client),

866

api_key=self._extract_api_key(ytcfg, default_client),

867

api_hostname=api_hostname, default_client=default_client)

868

except ExtractorError as e:

869

if not isinstance(e.cause, network_exceptions):

870

return self._error_or_warning(e, fatal=fatal)

871

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

876

if not is_html(first_bytes):

877

yt_error = try_get(

878

self._parse_json(

879

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

880

lambda x: x['error']['message'], str)

881

if yt_error:

882

self._report_alerts([('ERROR', yt_error)], fatal=False)

883

# Downloading page may result in intermittent 5xx HTTP error

884

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

885

# We also want to catch all other network exceptions since errors in later pages can be troublesome

886

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

887

if e.cause.code not in (403, 429):

888

retry.error = e

889

continue

890

return self._error_or_warning(e, fatal=fatal)

891

892

try:

893

self._extract_and_report_alerts(response, only_once=True)

894

except ExtractorError as e:

895

# YouTube servers may return errors we want to retry on in a 200 OK response

896

# See: https://github.com/yt-dlp/yt-dlp/issues/839

897

if 'unknown error' in e.msg.lower():

898

retry.error = e

899

continue

900

return self._error_or_warning(e, fatal=fatal)

901

# Youtube sometimes sends incomplete data

902

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

903

if not traverse_obj(response, *variadic(check_get_keys)):

904

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

911

return re.match(r'https?://music\.youtube\.com/', url) is not None

912

913

def _extract_video(self, renderer):

914

video_id = renderer.get('videoId')

915

916

reel_header_renderer = traverse_obj(renderer, (

917

'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',

918

'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))

919

920

title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')

921

description = self._get_text(renderer, 'descriptionSnippet')

922

923

duration = int_or_none(renderer.get('lengthSeconds'))

924

if duration is None:

925

duration = parse_duration(self._get_text(

926

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

927

if duration is None:

928

# XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)

929

duration = parse_duration(self._search_regex(

930

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

931

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

932

video_id, default=None, group='duration'))

933

934

channel_id = traverse_obj(

935

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

936

expected_type=str, get_all=False)

937

if not channel_id:

938

channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))

939

940

overlay_style = traverse_obj(

941

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

942

get_all=False, expected_type=str)

943

badges = self._extract_badges(renderer)

944

945

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

946

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

947

expected_type=str)) or ''

948

url = f'https://www.youtube.com/watch?v={video_id}'

949

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

950

url = f'https://www.youtube.com/shorts/{video_id}'

951

952

time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')

953

or self._get_text(reel_header_renderer, 'timestampText') or '')

954

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

955

956

live_status = (

957

'is_upcoming' if scheduled_timestamp is not None

958

else 'was_live' if 'streamed' in time_text.lower()

959

else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)

960

else None)

961

962

# videoInfo is a string like '50K views • 10 years ago'.

963

view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''

964

view_count = (0 if 'no views' in view_count_text.lower()

965

else self._get_count({'simpleText': view_count_text}))

966

view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

975

'duration': duration,

976

'channel_id': channel_id,

977

'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')

978

or self._get_text(reel_header_renderer, 'channelTitleText')),

979

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

980

'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),

981

'timestamp': (self._parse_time_text(time_text)

982

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

983

else None),

984

'release_timestamp': scheduled_timestamp,

985

'availability':

986

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

987

else self._availability(

988

is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,

989

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

990

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

991

is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),

992

view_count_field: view_count,

993

'live_status': live_status

}

class YoutubeIE(YoutubeBaseInfoExtractor):

998

IE_DESC = 'YouTube'

999

_VALID_URL = r"""(?x)^

1000

(

1001

(?:https?://|//) # http(s):// or protocol-independent URL

1002

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

1003

(?:www\.)?deturl\.com/www\.youtube\.com|

1004

(?:www\.)?pwnyoutube\.com|

1005

(?:www\.)?hooktube\.com|

1006

(?:www\.)?yourepeat\.com|

1007

tube\.majestyc\.net|

1008

%(invidious)s|

1009

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

1010

(?:.*?\#/)? # handle anchor (#/) redirect urls

1011

(?: # the various things that can precede the ID:

1012

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

1013

|(?: # or the v= param in all its forms

1014

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

1015

(?:\?|\#!?) # the params delimiter ? or # or #!

1016

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

1022

vid\.plus| # or vid.plus/xxxx

1023

zwearz\.com/watch| # or zwearz.com/watch/xxxx

1024

%(invidious)s

1025

)/

1026

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

1027

)

1028

)? # all until now is optional -> you can pass the naked ID

1029

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

1030

(?(1).+)? # if we found the ID, everything can follow

1031

(?:\#|$)""" % {

1032

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

}

_EMBED_REGEX = [

r'''(?x)

(?:

<(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1046

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1047

\1''',

1048

# https://wordpress.org/plugins/lazy-load-for-videos/

1049

r'''(?xs)

1050

<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"

1051

\s[^>]*\bclass="[^"]*\blazy-load-youtube''',

1052

]

1053

_RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore

1054

1055

_PLAYER_INFO_RE = (

1056

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

1057

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

1058

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

1059

)

1060

_formats = {

1061

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1062

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

1063

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

1064

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

1065

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

1066

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1067

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1068

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1069

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

1070

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

1071

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1072

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

1073

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1074

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

1075

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1076

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

1077

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

1078

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1083

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

1084

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1085

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

1086

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

1087

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1088

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

1089

1090

# Apple HTTP Live Streaming

1091

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1092

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1093

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1094

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

1095

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1096

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

1097

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

1098

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

1099

1100

# DASH mp4 video

1101

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

1102

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

1103

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1104

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

1105

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

1106

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

1107

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

1108

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

1109

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

1110

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1111

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

1112

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

1113

1114

# Dash mp4 audio

1115

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

1116

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

1117

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

1118

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1119

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

1120

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

1121

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

1122

1123

# Dash webm

1124

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1125

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1126

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1127

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1128

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1129

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1130

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1131

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1132

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1133

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1134

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1135

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1136

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1137

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1138

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1139

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1140

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1141

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1142

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1143

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1144

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1145

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1146

1147

# Dash webm audio

1148

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1149

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1150

1151

# Dash webm audio with opus inside

1152

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1153

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1154

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1155

1156

# RTMP (unnamed)

1157

'_rtmp': {'protocol': 'rtmp'},

1158

1159

# av01 video only formats sometimes served with "unknown" codecs

1160

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1161

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1162

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1163

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1164

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1165

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1166

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1167

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1168

}

1169

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1181

'uploader': 'Philipp Hagemeister',

1182

'uploader_id': 'phihag',

1183

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1184

'channel': 'Philipp Hagemeister',

1185

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1186

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1187

'upload_date': '20121002',

1188

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1189

'categories': ['Science & Technology'],

1190

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1195

'playable_in_embed': True,

1196

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1197

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1202

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1207

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1212

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1213

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1214

'uploader': 'SET India',

1215

'uploader_id': 'setindia',

1216

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1217

'age_limit': 18,

1218

},

1219

'skip': 'Private video',

1220

},

1221

{

1222

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1223

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1228

'uploader': 'Philipp Hagemeister',

1229

'uploader_id': 'phihag',

1230

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1231

'channel': 'Philipp Hagemeister',

1232

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1233

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1234

'upload_date': '20121002',

1235

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1236

'categories': ['Science & Technology'],

1237

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1242

'playable_in_embed': True,

1243

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1244

'live_status': 'not_live',

1245

'age_limit': 0,

1246

'comment_count': int,

1247

'channel_follower_count': int

1248

},

1249

'params': {

1250

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1255

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1260

'uploader_id': '8KVIDEO',

1261

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1262

'description': '',

1263

'uploader': '8KVIDEO',

1264

'title': 'UHDTV TEST 8K VIDEO.mp4'

1265

},

1266

'params': {

1267

'youtube_include_dash_manifest': True,

1268

'format': '141',

1269

},

1270

'skip': 'format 141 not served anymore',

1271

},

1272

# DASH manifest with encrypted signature

1273

{

1274

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1279

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1280

'duration': 244,

1281

'uploader': 'AfrojackVEVO',

1282

'uploader_id': 'AfrojackVEVO',

1283

'upload_date': '20131011',

1284

'abr': 129.495,

1285

'like_count': int,

1286

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1287

'playable_in_embed': True,

1288

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1289

'view_count': int,

1290

'track': 'The Spark',

1291

'live_status': 'not_live',

1292

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1293

'channel': 'Afrojack',

1294

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1295

'tags': 'count:19',

1296

'availability': 'public',

1297

'categories': ['Music'],

1298

'age_limit': 0,

1299

'alt_title': 'The Spark',

1300

'channel_follower_count': int

1301

},

1302

'params': {

1303

'youtube_include_dash_manifest': True,

1304

'format': '141/bestaudio[ext=m4a]',

1305

},

1306

},

1307

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1308

{

1309

'note': 'Embed allowed age-gate video',

1310

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1315

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1316

'duration': 142,

1317

'uploader': 'The Witcher',

1318

'uploader_id': 'WitcherGame',

1319

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1320

'upload_date': '20140605',

1321

'age_limit': 18,

1322

'categories': ['Gaming'],

1323

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1324

'availability': 'needs_auth',

1325

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1326

'like_count': int,

1327

'channel': 'The Witcher',

1328

'live_status': 'not_live',

1329

'tags': 'count:17',

1330

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1331

'playable_in_embed': True,

1332

'view_count': int,

1333

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1338

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1343

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1344

'upload_date': '20200408',

1345

'uploader_id': 'FlyingKitty900',

1346

'uploader': 'FlyingKitty',

1347

'age_limit': 18,

1348

'availability': 'needs_auth',

1349

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1350

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1351

'channel': 'FlyingKitty',

1352

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1353

'view_count': int,

1354

'categories': ['Entertainment'],

1355

'live_status': 'not_live',

1356

'tags': ['Flyingkitty', 'godzilla 2'],

1357

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1358

'like_count': int,

1359

'duration': 177,

1360

'playable_in_embed': True,

1361

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1366

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1367

'info_dict': {

1368

'id': 'Tq92D6wQ1mg',

1369

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1370

'ext': 'mp4',

1371

'upload_date': '20191228',

1372

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1373

'uploader': 'Projekt Melody',

1374

'description': 'md5:17eccca93a786d51bc67646756894066',

1375

'age_limit': 18,

1376

'like_count': int,

1377

'availability': 'needs_auth',

1378

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1379

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1380

'view_count': int,

1381

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1382

'channel': 'Projekt Melody',

1383

'live_status': 'not_live',

1384

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1385

'playable_in_embed': True,

1386

'categories': ['Entertainment'],

1387

'duration': 106,

1388

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1389

'comment_count': int,

1390

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1395

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1400

'uploader': 'Herr Lurik',

1401

'uploader_id': 'st3in234',

1402

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1403

'upload_date': '20130730',

1404

'track': 'Such mich find mich',

1405

'age_limit': 0,

1406

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1407

'like_count': int,

1408

'playable_in_embed': False,

1409

'creator': 'OOMPH!',

1410

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1411

'view_count': int,

1412

'alt_title': 'Such mich find mich',

1413

'duration': 210,

1414

'channel': 'Herr Lurik',

1415

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1416

'categories': ['Music'],

1417

'availability': 'public',

1418

'uploader_url': 'http://www.youtube.com/user/st3in234',

1419

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1420

'live_status': 'not_live',

1421

'artist': 'OOMPH!',

1422

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1427

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1428

'only_matching': True,

1429

},

1430

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1431

# YouTube Red ad is not captured for creator

1432

{

1433

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1439

'uploader_id': 'deadmau5',

1440

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1441

'creator': 'deadmau5',

1442

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1443

'uploader': 'deadmau5',

1444

'title': 'Deadmau5 - Some Chords (HD)',

1445

'alt_title': 'Some Chords',

1446

'availability': 'public',

1447

'tags': 'count:14',

1448

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1449

'view_count': int,

1450

'live_status': 'not_live',

1451

'channel': 'deadmau5',

1452

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1453

'like_count': int,

1454

'track': 'Some Chords',

1455

'artist': 'deadmau5',

1456

'playable_in_embed': True,

1457

'age_limit': 0,

1458

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1459

'categories': ['Music'],

1460

'album': 'Some Chords',

1461

'channel_follower_count': int

1462

},

1463

'expected_warnings': [

1464

'DASH manifest missing',

1465

]

1466

},

1467

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1468

{

1469

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1475

'uploader_id': 'olympic',

1476

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1477

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1478

'uploader': 'Olympics',

1479

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1480

'like_count': int,

1481

'release_timestamp': 1343767800,

1482

'playable_in_embed': True,

1483

'categories': ['Sports'],

1484

'release_date': '20120731',

1485

'channel': 'Olympics',

1486

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1487

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1488

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1489

'age_limit': 0,

1490

'availability': 'public',

1491

'live_status': 'was_live',

1492

'view_count': int,

1493

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1494

'channel_follower_count': int

1495

},

1496

'params': {

1497

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1507

'duration': 85,

1508

'upload_date': '20110310',

1509

'uploader_id': 'AllenMeow',

1510

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1511

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1512

'uploader': '孫ᄋᄅ',

1513

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1514

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1519

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1520

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1521

'view_count': int,

1522

'categories': ['People & Blogs'],

1523

'like_count': int,

1524

'live_status': 'not_live',

1525

'availability': 'unlisted',

1526

'comment_count': int,

1527

'channel_follower_count': int

1528

},

1529

},

1530

# url_encoded_fmt_stream_map is empty string

1531

{

1532

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1537

'description': '',

1538

'upload_date': '20150404',

1539

'uploader_id': 'spbelect',

1540

'uploader': 'Наблюдатели Петербурга',

1541

},

1542

'params': {

1543

'skip_download': 'requires avconv',

1544

},

1545

'skip': 'This live event has ended.',

1546

},

1547

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1548

{

1549

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1554

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1555

'duration': 220,

1556

'upload_date': '20150625',

1557

'uploader_id': 'dorappi2000',

1558

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1559

'uploader': 'dorappi2000',

1560

'formats': 'mincount:31',

1561

},

1562

'skip': 'not actual anymore',

1563

},

1564

# DASH manifest with segment_list

1565

{

1566

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1567

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1572

'uploader': 'Airtek',

1573

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1574

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1575

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1576

},

1577

'params': {

1578

'youtube_include_dash_manifest': True,

1579

'format': '135', # bestvideo

1580

},

1581

'skip': 'This live event has ended.',

1582

},

1583

{

1584

# Multifeed videos (multiple cameras), URL is for Main Camera

1585

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1586

'info_dict': {

1587

'id': 'jvGDaLqkpTg',

1588

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1589

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1596

'description': 'md5:e03b909557865076822aa169218d6a5d',

1597

'duration': 10643,

1598

'upload_date': '20161111',

1599

'uploader': 'Team PGP',

1600

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1601

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1608

'description': 'md5:e03b909557865076822aa169218d6a5d',

1609

'duration': 10991,

1610

'upload_date': '20161111',

1611

'uploader': 'Team PGP',

1612

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1613

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1620

'description': 'md5:e03b909557865076822aa169218d6a5d',

1621

'duration': 10995,

1622

'upload_date': '20161111',

1623

'uploader': 'Team PGP',

1624

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1625

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1632

'description': 'md5:e03b909557865076822aa169218d6a5d',

1633

'duration': 10990,

1634

'upload_date': '20161111',

1635

'uploader': 'Team PGP',

1636

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1637

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1642

},

1643

'skip': 'Not multifeed anymore',

1644

},

1645

{

1646

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1647

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1648

'info_dict': {

1649

'id': 'gVfLd0zydlo',

1650

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1651

},

1652

'playlist_count': 2,

1653

'skip': 'Not multifeed anymore',

1654

},

1655

{

1656

'url': 'https://vid.plus/FlRa-iH7PGw',

1657

'only_matching': True,

1658

},

1659

{

1660

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1661

'only_matching': True,

1662

},

1663

{

1664

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1665

# Also tests cut-off URL expansion in video description (see

1666

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1667

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1668

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1673

'alt_title': 'Dark Walk',

1674

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1675

'duration': 133,

1676

'upload_date': '20151119',

1677

'uploader_id': 'IronSoulElf',

1678

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1679

'uploader': 'IronSoulElf',

1680

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1681

'track': 'Dark Walk',

1682

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1683

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1684

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1685

'categories': ['Film & Animation'],

1686

'view_count': int,

1687

'live_status': 'not_live',

1688

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1689

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1690

'tags': 'count:13',

1691

'availability': 'public',

1692

'channel': 'IronSoulElf',

1693

'playable_in_embed': True,

1694

'like_count': int,

1695

'age_limit': 0,

1696

'channel_follower_count': int

1697

},

1698

'params': {

1699

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1704

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1705

'only_matching': True,

1706

},

1707

{

1708

# Video with yt:stretch=17:0

1709

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1714

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1715

'upload_date': '20151107',

1716

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1717

'uploader': 'CH GAMER DROID',

1718

},

1719

'params': {

1720

'skip_download': True,

1721

},

1722

'skip': 'This video does not exist.',

1723

},

1724

{

1725

# Video with incomplete 'yt:stretch=16:'

1726

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1727

'only_matching': True,

1728

},

1729

{

1730

# Video licensed under Creative Commons

1731

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1736

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1737

'duration': 721,

1738

'upload_date': '20150128',

1739

'uploader_id': 'BerkmanCenter',

1740

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1741

'uploader': 'The Berkman Klein Center for Internet & Society',

1742

'license': 'Creative Commons Attribution license (reuse allowed)',

1743

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1744

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1745

'like_count': int,

1746

'age_limit': 0,

1747

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1748

'channel': 'The Berkman Klein Center for Internet & Society',

1749

'availability': 'public',

1750

'view_count': int,

1751

'categories': ['Education'],

1752

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1753

'live_status': 'not_live',

1754

'playable_in_embed': True,

1755

'comment_count': int,

1756

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# Channel-like uploader_url

1765

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1770

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1771

'duration': 4060,

1772

'upload_date': '20151120',

1773

'uploader': 'Bernie Sanders',

1774

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1775

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1776

'license': 'Creative Commons Attribution license (reuse allowed)',

1777

'playable_in_embed': True,

1778

'tags': 'count:12',

1779

'like_count': int,

1780

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1781

'age_limit': 0,

1782

'availability': 'public',

1783

'categories': ['News & Politics'],

1784

'channel': 'Bernie Sanders',

1785

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1786

'view_count': int,

1787

'live_status': 'not_live',

1788

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1789

'comment_count': int,

1790

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1799

'only_matching': True,

1800

},

1801

{

1802

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1803

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1804

'only_matching': True,

1805

},

1806

{

1807

# Rental video preview

1808

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1813

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1814

'upload_date': '20150811',

1815

'uploader': 'FlixMatrix',

1816

'uploader_id': 'FlixMatrixKaravan',

1817

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1818

'license': 'Standard YouTube License',

1819

},

1820

'params': {

1821

'skip_download': True,

1822

},

1823

'skip': 'This video is not available.',

1824

},

1825

{

1826

# YouTube Red video with episode data

1827

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1832

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1833

'duration': 2085,

1834

'upload_date': '20170118',

1835

'uploader': 'Vsauce',

1836

'uploader_id': 'Vsauce',

1837

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1838

'series': 'Mind Field',

1839

'season_number': 1,

1840

'episode_number': 1,

1841

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1842

'tags': 'count:12',

1843

'view_count': int,

1844

'availability': 'public',

1845

'age_limit': 0,

1846

'channel': 'Vsauce',

1847

'episode': 'Episode 1',

1848

'categories': ['Entertainment'],

1849

'season': 'Season 1',

1850

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1851

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1852

'like_count': int,

1853

'playable_in_embed': True,

1854

'live_status': 'not_live',

1855

'channel_follower_count': int

1856

},

1857

'params': {

1858

'skip_download': True,

1859

},

1860

'expected_warnings': [

1861

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1866

# as inappropriate or offensive to some audiences.

1867

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1872

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1873

'duration': 965,

1874

'upload_date': '20140124',

1875

'uploader': 'New Century Foundation',

1876

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1877

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1878

},

1879

'params': {

1880

'skip_download': True,

1881

},

1882

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1887

'only_matching': True,

1888

},

1889

{

1890

# geo restricted to JP

1891

'url': 'sJL6WA-aGkQ',

1892

'only_matching': True,

1893

},

1894

{

1895

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1896

'only_matching': True,

1897

},

1898

{

1899

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1900

'only_matching': True,

1901

},

1902

{

1903

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1904

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1905

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1910

'only_matching': True,

1911

},

1912

{

1913

# Video with unsupported adaptive stream type formats

1914

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1919

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1920

'duration': 433,

1921

'upload_date': '20130923',

1922

'uploader': 'Amelia Putri Harwita',

1923

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1924

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1925

'formats': 'maxcount:10',

1926

},

1927

'params': {

1928

'skip_download': True,

1929

'youtube_include_dash_manifest': False,

1930

},

1931

'skip': 'not actual anymore',

1932

},

1933

{

1934

# Youtube Music Auto-generated description

1935

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1940

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1941

'upload_date': '20190312',

1942

'uploader': 'Stephen - Topic',

1943

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1944

'artist': 'Stephen',

1945

'track': 'Voyeur Girl',

1946

'album': 'it\'s too much love to know my dear',

1947

'release_date': '20190313',

1948

'release_year': 2019,

1949

'alt_title': 'Voyeur Girl',

1950

'view_count': int,

1951

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1952

'playable_in_embed': True,

1953

'like_count': int,

1954

'categories': ['Music'],

1955

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1956

'channel': 'Stephen',

1957

'availability': 'public',

1958

'creator': 'Stephen',

1959

'duration': 169,

1960

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1961

'age_limit': 0,

1962

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1963

'tags': 'count:11',

1964

'live_status': 'not_live',

1965

'channel_follower_count': int

1966

},

1967

'params': {

1968

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1973

'only_matching': True,

1974

},

1975

{

1976

# invalid -> valid video id redirection

1977

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1982

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1983

'upload_date': '20090125',

1984

'uploader': 'Prochorowka',

1985

'uploader_id': 'Prochorowka',

1986

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1987

'artist': 'Panjabi MC',

1988

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1989

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1990

},

1991

'params': {

1992

'skip_download': True,

1993

},

1994

'skip': 'Video unavailable',

1995

},

1996

{

1997

# empty description results in an empty string

1998

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

2005

'uploader_id': 'ElevageOrVert',

2006

'uploader': 'ElevageOrVert',

2007

'view_count': int,

2008

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

2009

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

2010

'like_count': int,

2011

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

2012

'tags': [],

2013

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

2014

'availability': 'public',

2015

'age_limit': 0,

2016

'categories': ['Pets & Animals'],

2017

'duration': 7,

2018

'playable_in_embed': True,

2019

'live_status': 'not_live',

2020

'channel': 'ElevageOrVert',

2021

'channel_follower_count': int

2022

},

2023

'params': {

2024

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

2029

# see [2] for an example with '};' inside ytInitialPlayerResponse

2030

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

2031

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

2032

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

2037

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

2038

'upload_date': '20130831',

2039

'uploader_id': 'kudvenkat',

2040

'uploader': 'kudvenkat',

2041

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

2042

'like_count': int,

2043

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

2044

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

2045

'live_status': 'not_live',

2046

'categories': ['Education'],

2047

'availability': 'public',

2048

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

2049

'tags': 'count:12',

2050

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

2055

'comment_count': int,

2056

'channel_follower_count': int,

'chapters': list,

},

'params': {

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

2065

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

2066

'only_matching': True,

2067

},

2068

{

2069

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

2070

'only_matching': True,

2071

},

2072

{

2073

# https://github.com/ytdl-org/youtube-dl/pull/28094

2074

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

2080

'upload_date': '20141120',

2081

'uploader': 'The Cinematic Orchestra - Topic',

2082

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2083

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2084

'artist': 'The Cinematic Orchestra',

2085

'track': 'Burn Out',

2086

'album': 'Every Day',

2087

'like_count': int,

2088

'live_status': 'not_live',

2089

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

2094

'creator': 'The Cinematic Orchestra',

2095

'channel': 'The Cinematic Orchestra',

2096

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

2097

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

2098

'availability': 'public',

2099

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

2100

'categories': ['Music'],

2101

'playable_in_embed': True,

2102

'channel_follower_count': int

2103

},

2104

'params': {

2105

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

2110

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

2111

'only_matching': True,

2112

},

2113

{

2114

# controversial video, requires bpctr/contentCheckOk

2115

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

2120

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

2121

'uploader': 'CBS Mornings',

2122

'uploader_id': 'CBSThisMorning',

2123

'upload_date': '20140716',

2124

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

2125

'duration': 170,

2126

'categories': ['News & Politics'],

2127

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

2128

'view_count': int,

2129

'channel': 'CBS Mornings',

2130

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2131

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2132

'age_limit': 18,

2133

'availability': 'needs_auth',

2134

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2135

'like_count': int,

2136

'live_status': 'not_live',

2137

'playable_in_embed': True,

2138

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2143

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2148

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2149

'upload_date': '20201120',

2150

'uploader': 'Walk around Japan',

2151

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2152

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2153

'duration': 1456,

2154

'categories': ['Travel & Events'],

2155

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2156

'view_count': int,

2157

'channel': 'Walk around Japan',

2158

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2159

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2160

'age_limit': 0,

2161

'availability': 'public',

2162

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2163

'live_status': 'not_live',

2164

'playable_in_embed': True,

2165

'channel_follower_count': int

2166

},

2167

'params': {

2168

'skip_download': True,

2169

},

2170

}, {

2171

# Has multiple audio streams

2172

'url': 'WaOKSUlf4TM',

2173

'only_matching': True

2174

}, {

2175

# Requires Premium: has format 141 when requested using YTM url

2176

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2177

'only_matching': True

2178

}, {

2179

# multiple subtitles with same lang_code

2180

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2181

'only_matching': True,

2182

}, {

2183

# Force use android client fallback

2184

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2185

'info_dict': {

2186

'id': 'YOelRv7fMxY',

2187

'title': 'DIGGING A SECRET TUNNEL Part 1',

2188

'ext': '3gp',

2189

'upload_date': '20210624',

2190

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2191

'uploader': 'colinfurze',

2192

'uploader_id': 'colinfurze',

2193

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2194

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2195

'duration': 596,

2196

'categories': ['Entertainment'],

2197

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2198

'view_count': int,

2199

'channel': 'colinfurze',

2200

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2201

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2202

'age_limit': 0,

2203

'availability': 'public',

2204

'like_count': int,

2205

'live_status': 'not_live',

2206

'playable_in_embed': True,

2207

'channel_follower_count': int,

'chapters': list,

},

'params': {

'format': '17', # 3gp format available on android

2212

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2217

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2218

'only_matching': True,

2219

'params': {

2220

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2225

'only_matching': True,

2226

}, {

2227

'note': 'Storyboards',

2228

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2234

'uploader_id': 'scishow',

2235

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2236

'upload_date': '20140324',

2237

'uploader': 'SciShow',

2238

'like_count': int,

2239

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2240

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2241

'view_count': int,

2242

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2243

'playable_in_embed': True,

2244

'tags': 'count:12',

2245

'uploader_url': 'http://www.youtube.com/user/scishow',

2246

'availability': 'public',

2247

'channel': 'SciShow',

2248

'live_status': 'not_live',

2249

'duration': 248,

2250

'categories': ['Education'],

2251

'age_limit': 0,

2252

'channel_follower_count': int,

2253

'chapters': list,

2254

}, 'params': {'format': 'mhtml', 'skip_download': True}

2255

}, {

2256

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2257

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2262

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2263

'uploader': 'Leon Nguyen',

2264

'uploader_id': 'VNSXIII',

2265

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2266

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2267

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2272

'tags': 'count:23',

2273

'playable_in_embed': True,

2274

'live_status': 'not_live',

2275

'upload_date': '20220103',

2276

'like_count': int,

2277

'availability': 'public',

2278

'channel': 'Leon Nguyen',

2279

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2280

'comment_count': int,

2281

'channel_follower_count': int

2282

}

2283

}, {

2284

# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date

2285

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2290

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2291

'uploader': 'Leon Nguyen',

2292

'uploader_id': 'VNSXIII',

2293

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2294

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2295

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2300

'tags': 'count:23',

2301

'playable_in_embed': True,

2302

'live_status': 'not_live',

2303

'upload_date': '20220102',

2304

'like_count': int,

2305

'availability': 'public',

2306

'channel': 'Leon Nguyen',

2307

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2308

'comment_count': int,

2309

'channel_follower_count': int

2310

},

2311

'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}

2312

}, {

2313

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2314

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2319

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2320

'uploader': 'Quackity',

2321

'uploader_id': 'QuackityHQ',

2322

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2323

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2324

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2329

'tags': 'count:26',

2330

'playable_in_embed': True,

2331

'live_status': 'not_live',

2332

'release_timestamp': 1641172509,

2333

'release_date': '20220103',

2334

'upload_date': '20220103',

2335

'like_count': int,

2336

'availability': 'public',

2337

'channel': 'Quackity',

2338

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2339

'channel_follower_count': int

2340

}

2341

},

2342

{ # continuous livestream. Microformat upload date should be preferred.

2343

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2344

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2345

'info_dict': {

2346

'id': 'kgx4WGK0oNU',

2347

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2348

'ext': 'mp4',

2349

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2350

'availability': 'public',

2351

'age_limit': 0,

2352

'release_timestamp': 1637975704,

2353

'upload_date': '20210619',

2354

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2355

'live_status': 'is_live',

2356

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2357

'uploader': '阿鲍Abao',

2358

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2359

'channel': 'Abao in Tokyo',

2360

'channel_follower_count': int,

2361

'release_date': '20211127',

2362

'tags': 'count:39',

2363

'categories': ['People & Blogs'],

2364

'like_count': int,

2365

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2366

'view_count': int,

2367

'playable_in_embed': True,

2368

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2369

'concurrent_view_count': int,

2370

},

2371

'params': {'skip_download': True}

2372

}, {

2373

# Story. Requires specific player params to work.

2374

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2379

'view_count': int,

2380

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2381

'upload_date': '20220526',

2382

'categories': ['Education'],

2383

'title': 'Story',

2384

'channel': 'IT\'S HISTORY',

2385

'description': '',

2386

'uploader_id': 'BlastfromthePast',

2387

'duration': 12,

2388

'uploader': 'IT\'S HISTORY',

2389

'playable_in_embed': True,

2390

'age_limit': 0,

2391

'live_status': 'not_live',

2392

'tags': [],

2393

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2394

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2395

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2396

},

2397

'skip': 'stories get removed after some period of time',

2398

}, {

2399

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2404

'upload_date': '20220323',

2405

'like_count': int,

2406

'availability': 'unlisted',

2407

'channel': 'nao20010128nao',

2408

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2409

'age_limit': 0,

2410

'uploader': 'nao20010128nao',

2411

'uploader_id': 'nao20010128nao',

2412

'categories': ['Music'],

2413

'view_count': int,

2414

'description': '',

2415

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2416

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2417

'live_status': 'not_live',

2418

'playable_in_embed': True,

2419

'channel_follower_count': int,

2420

'duration': 6,

2421

'tags': [],

2422

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2423

}

2424

}, {

2425

# Prefer primary title+description language metadata by default

2426

# Do not prefer translated description if primary is empty

2427

'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',

'info_dict': {

'id': 'el3E4MbxRqQ',

'ext': 'mp4',

'title': 'dlp test video 2 - primary sv no desc',

2432

'description': '',

2433

'channel': 'cole-dlp-test-acc',

2434

'tags': [],

2435

'view_count': int,

2436

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2437

'like_count': int,

2438

'playable_in_embed': True,

2439

'availability': 'unlisted',

2440

'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',

2441

'age_limit': 0,

2442

'duration': 5,

2443

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2444

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2445

'live_status': 'not_live',

2446

'upload_date': '20220908',

2447

'categories': ['People & Blogs'],

2448

'uploader': 'cole-dlp-test-acc',

2449

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2450

},

2451

'params': {'skip_download': True}

2452

}, {

2453

# Extractor argument: prefer translated title+description

2454

'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',

'info_dict': {

'id': 'gHKT4uU8Zng',

'ext': 'mp4',

'channel': 'cole-dlp-test-acc',

2459

'tags': [],

2460

'duration': 5,

2461

'live_status': 'not_live',

2462

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

2463

'upload_date': '20220728',

2464

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

2465

'view_count': int,

2466

'categories': ['People & Blogs'],

2467

'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',

2468

'title': 'dlp test video title translated (fr)',

2469

'availability': 'public',

2470

'uploader': 'cole-dlp-test-acc',

2471

'age_limit': 0,

2472

'description': 'dlp test video description translated (fr)',

2473

'playable_in_embed': True,

2474

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2475

'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

2476

},

2477

'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},

2478

'expected_warnings': [r'Preferring "fr" translated fields'],

2479

}, {

2480

'note': '6 channel audio',

2481

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2482

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2488

{

2489

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2490

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2495

'upload_date': '20080526',

2496

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2497

'uploader': 'Christopher Sykes',

2498

'uploader_id': 'ChristopherJSykes',

2499

'age_limit': 0,

2500

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2501

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2502

'playable_in_embed': True,

2503

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2504

'like_count': int,

2505

'comment_count': int,

2506

'channel': 'Christopher Sykes',

2507

'live_status': 'not_live',

2508

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2509

'availability': 'public',

2510

'duration': 195,

2511

'view_count': int,

2512

'categories': ['Science & Technology'],

2513

'channel_follower_count': int,

2514

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2515

},

2516

'params': {

2517

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2524

from ..utils import parse_qs

2525

2526

qs = parse_qs(url)

2527

if qs.get('list', [None])[0]:

2528

return False

2529

return super().suitable(url)

2530

2531

def __init__(self, *args, **kwargs):

2532

super().__init__(*args, **kwargs)

2533

self._code_cache = {}

2534

self._player_cache = {}

2535

2536

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):

2537

lock = threading.Lock()

2538

start_time = time.time()

2539

formats = [f for f in formats if f.get('is_from_start')]

2540

2541

def refetch_manifest(format_id, delay):

2542

nonlocal formats, start_time, is_live

2543

if time.time() <= start_time + delay:

2544

return

2545

2546

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2547

video_details = traverse_obj(

2548

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2549

microformats = traverse_obj(

2550

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2551

expected_type=dict, default=[])

2552

_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2553

is_live = live_status == 'is_live'

2554

start_time = time.time()

2555

2556

def mpd_feed(format_id, delay):

2557

"""

2558

@returns (manifest_url, manifest_stream_number, is_live) or None

2559

"""

2560

with lock:

2561

refetch_manifest(format_id, delay)

2562

2563

f = next((f for f in formats if f['format_id'] == format_id), None)

2564

if not f:

2565

if not is_live:

2566

self.to_screen(f'{video_id}: Video is no longer live')

2567

else:

2568

self.report_warning(

2569

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2570

return None

2571

return f['manifest_url'], f['manifest_stream_number'], is_live

2572

2573

for f in formats:

2574

f['is_live'] = is_live

2575

gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],

2576

live_start_time, mpd_feed, not is_live and f.copy())

2577

if is_live:

2578

f['fragments'] = gen

2579

f['protocol'] = 'http_dash_segments_generator'

2580

else:

2581

f['fragments'] = LazyList(gen({}))

2582

del f['is_from_start']

2583

2584

def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):

2585

FETCH_SPAN, MAX_DURATION = 5, 432000

2586

2587

mpd_url, stream_number, is_live = None, None, True

2588

2589

begin_index = 0

2590

download_start_time = ctx.get('start') or time.time()

2591

2592

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2593

if lack_early_segments:

2594

self.report_warning(bug_reports_message(

2595

'Starting download from the last 120 hours of the live stream since '

2596

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2597

lack_early_segments = True

2598

2599

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2600

fragments, fragment_base_url = None, None

2601

2602

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2603

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2604

# Obtain from MPD's maximum seq value

2605

old_mpd_url = mpd_url

2606

last_error = ctx.pop('last_error', None)

2607

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2608

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2609

or (mpd_url, stream_number, False))

2610

if not refresh_sequence:

2611

if expire_fast and not is_live:

2612

return False, last_seq

2613

elif old_mpd_url == mpd_url:

2614

return True, last_seq

2615

if manifestless_orig_fmt:

2616

fmt_info = manifestless_orig_fmt

2617

else:

2618

try:

2619

fmts, _ = self._extract_mpd_formats_and_subtitles(

2620

mpd_url, None, note=False, errnote=False, fatal=False)

2621

except ExtractorError:

2622

fmts = None

2623

if not fmts:

2624

no_fragment_score += 2

2625

return False, last_seq

2626

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2627

fragments = fmt_info['fragments']

2628

fragment_base_url = fmt_info['fragment_base_url']

2629

assert fragment_base_url

2630

2631

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2632

return True, _last_seq

2633

2634

self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')

2635

while is_live:

2636

fetch_time = time.time()

2637

if no_fragment_score > 30:

2638

return

2639

if last_segment_url:

2640

# Obtain from "X-Head-Seqnum" header value from each segment

2641

try:

2642

urlh = self._request_webpage(

2643

last_segment_url, None, note=False, errnote=False, fatal=False)

2644

except ExtractorError:

2645

urlh = None

2646

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2647

if last_seq is None:

2648

no_fragment_score += 2

2649

last_segment_url = None

2650

continue

2651

else:

2652

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2653

no_fragment_score += 2

2654

if not should_continue:

2655

continue

2656

2657

if known_idx > last_seq:

2658

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2664

# skip from the start when it's negative value

2665

known_idx = last_seq + begin_index

2666

if lack_early_segments:

2667

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2668

try:

2669

for idx in range(known_idx, last_seq):

2670

# do not update sequence here or you'll get skipped some part of it

2671

should_continue, _ = _extract_sequence_from_mpd(False, False)

2672

if not should_continue:

2673

known_idx = idx - 1

2674

raise ExtractorError('breaking out of outer loop')

2675

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2676

yield {

2677

'url': last_segment_url,

2678

'fragment_count': last_seq,

2679

}

2680

if known_idx == last_seq:

2681

no_fragment_score += 5

2682

else:

2683

no_fragment_score = 0

2684

known_idx = last_seq

2685

except ExtractorError:

2686

continue

2687

2688

if manifestless_orig_fmt:

2689

# Stop at the first iteration if running for post-live manifestless;

2690

# fragment count no longer increase since it starts

2691

break

2692

2693

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2694

2695

def _extract_player_url(self, *ytcfgs, webpage=None):

2696

player_url = traverse_obj(

2697

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2698

get_all=False, expected_type=str)

2699

if not player_url:

2700

return

2701

return urljoin('https://www.youtube.com', player_url)

2702

2703

def _download_player_url(self, video_id, fatal=False):

2704

res = self._download_webpage(

2705

'https://www.youtube.com/iframe_api',

2706

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2707

if res:

2708

player_version = self._search_regex(

2709

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2710

if player_version:

2711

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2712

2713

def _signature_cache_id(self, example_sig):

2714

""" Return a string representation of a signature """

2715

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2716

2717

@classmethod

2718

def _extract_player_info(cls, player_url):

2719

for player_re in cls._PLAYER_INFO_RE:

2720

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2725

return id_m.group('id')

2726

2727

def _load_player(self, video_id, player_url, fatal=True):

2728

player_id = self._extract_player_info(player_url)

2729

if player_id not in self._code_cache:

2730

code = self._download_webpage(

2731

player_url, video_id, fatal=fatal,

2732

note='Downloading player ' + player_id,

2733

errnote='Download of %s failed' % player_url)

2734

if code:

2735

self._code_cache[player_id] = code

2736

return self._code_cache.get(player_id)

2737

2738

def _extract_signature_function(self, video_id, player_url, example_sig):

2739

player_id = self._extract_player_info(player_url)

2740

2741

# Read from filesystem cache

2742

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2743

assert os.path.basename(func_id) == func_id

2744

2745

self.write_debug(f'Extracting signature function {func_id}')

2746

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2747

2748

if not cache_spec:

2749

code = self._load_player(video_id, player_url)

2750

if code:

2751

res = self._parse_sig_js(code)

2752

test_string = ''.join(map(chr, range(len(example_sig))))

2753

cache_spec = [ord(c) for c in res(test_string)]

2754

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2755

2756

return lambda s: ''.join(s[i] for i in cache_spec)

2757

2758

def _print_sig_code(self, func, example_sig):

2759

if not self.get_param('youtube_print_sig_code'):

2760

return

2761

2762

def gen_sig_code(idxs):

2763

def _genslice(start, end, step):

2764

starts = '' if start == 0 else str(start)

2765

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2766

steps = '' if step == 1 else (':%d' % step)

2767

return f's[{starts}{ends}{steps}]'

2768

2769

step = None

2770

# Quelch pyflakes warnings - start will be set when step is set

2771

start = '(Never used)'

2772

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2777

step = None

2778

continue

2779

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2789

2790

test_string = ''.join(map(chr, range(len(example_sig))))

2791

cache_res = func(test_string)

2792

cache_spec = [ord(c) for c in cache_res]

2793

expr_code = ' + '.join(gen_sig_code(cache_spec))

2794

signature_id_tuple = '(%s)' % (

2795

', '.join(str(len(p)) for p in example_sig.split('.')))

2796

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2797

' return %s\n') % (signature_id_tuple, expr_code)

2798

self.to_screen('Extracted signature function:\n' + code)

2799

2800

def _parse_sig_js(self, jscode):

2801

funcname = self._search_regex(

2802

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2803

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2804

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2805

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2806

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2807

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2808

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2809

# Obsolete patterns

2810

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2811

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2812

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2813

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2814

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2815

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2816

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2817

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2818

jscode, 'Initial JS player signature function name', group='sig')

2819

2820

jsi = JSInterpreter(jscode)

2821

initial_function = jsi.extract_function(funcname)

2822

return lambda s: initial_function([s])

2823

2824

def _cached(self, func, *cache_id):

2825

def inner(*args, **kwargs):

2826

if cache_id not in self._player_cache:

2827

try:

2828

self._player_cache[cache_id] = func(*args, **kwargs)

2829

except ExtractorError as e:

2830

self._player_cache[cache_id] = e

2831

except Exception as e:

2832

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2833

2834

ret = self._player_cache[cache_id]

2835

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2841

"""Turn the encrypted s field into a working signature"""

2842

extract_sig = self._cached(

2843

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2844

func = extract_sig(video_id, player_url, s)

2845

self._print_sig_code(func, s)

2846

return func(s)

2847

2848

def _decrypt_nsig(self, s, video_id, player_url):

2849

"""Turn the encrypted n field into a working signature"""

2850

if player_url is None:

2851

raise ExtractorError('Cannot decrypt nsig without player_url')

2852

player_url = urljoin('https://www.youtube.com', player_url)

2853

2854

try:

2855

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2856

except ExtractorError as e:

2857

raise ExtractorError('Unable to extract nsig function code', cause=e)

2858

if self.get_param('youtube_print_sig_code'):

2859

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2860

2861

try:

2862

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2863

ret = extract_nsig(jsi, func_code)(s)

2864

except JSInterpreter.Exception as e:

2865

try:

2866

jsi = PhantomJSwrapper(self, timeout=5000)

2867

except ExtractorError:

2868

raise e

2869

self.report_warning(

2870

f'Native nsig extraction failed: Trying with PhantomJS\n'

2871

f' n = {s} ; player = {player_url}', video_id)

2872

self.write_debug(e, only_once=True)

2873

2874

args, func_body = func_code

2875

ret = jsi.execute(

2876

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2877

video_id=video_id, note='Executing signature code').strip()

2878

2879

self.write_debug(f'Decrypted nsig {s} => {ret}')

2880

return ret

2881

2882

def _extract_n_function_name(self, jscode):

2883

funcname, idx = self._search_regex(

2884

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2885

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2890

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2891

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2892

2893

def _extract_n_function_code(self, video_id, player_url):

2894

player_id = self._extract_player_info(player_url)

2895

func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')

2896

jscode = func_code or self._load_player(video_id, player_url)

2897

jsi = JSInterpreter(jscode)

2898

2899

if func_code:

2900

return jsi, player_id, func_code

2901

2902

func_name = self._extract_n_function_name(jscode)

2903

2904

# For redundancy

2905

func_code = self._search_regex(

2906

r'''(?xs)%s\s*=\s*function\s*$(?P<var>[\w$]+)$\s*

2907

# NB: The end of the regex is intentionally kept strict

2908

{(?P<code>.+?}\s*return\ [\w$]+.join$""$)};''' % func_name,

2909

jscode, 'nsig function', group=('var', 'code'), default=None)

2910

if func_code:

2911

func_code = ([func_code[0]], func_code[1])

2912

else:

2913

self.write_debug('Extracting nsig function with jsinterp')

2914

func_code = jsi.extract_function_code(func_name)

2915

2916

self.cache.store('youtube-nsig', player_id, func_code)

2917

return jsi, player_id, func_code

2918

2919

def _extract_n_function_from_code(self, jsi, func_code):

2920

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2926

raise

2927

except Exception as e:

2928

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2929

2930

if ret.startswith('enhanced_except_'):

2931

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2937

"""

2938

Extract signatureTimestamp (sts)

2939

Required to tell API what sig/player version is in use.

2940

"""

2941

sts = None

2942

if isinstance(ytcfg, dict):

2943

sts = int_or_none(ytcfg.get('STS'))

2944

2945

if not sts:

2946

# Attempt to extract from player

2947

if player_url is None:

2948

error_msg = 'Cannot extract signature timestamp without player_url.'

2949

if fatal:

2950

raise ExtractorError(error_msg)

2951

self.report_warning(error_msg)

2952

return

2953

code = self._load_player(video_id, player_url, fatal=fatal)

2954

if code:

2955

sts = int_or_none(self._search_regex(

2956

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2957

'JS player signature timestamp', group='sts', fatal=fatal))

2958

return sts

2959

2960

def _mark_watched(self, video_id, player_responses):

2961

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2962

label = 'fully ' if is_full else ''

2963

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2964

expected_type=url_or_none)

2965

if not url:

2966

self.report_warning(f'Unable to mark {label}watched')

2967

return

2968

parsed_url = urllib.parse.urlparse(url)

2969

qs = urllib.parse.parse_qs(parsed_url.query)

2970

2971

# cpn generation algorithm is reverse engineered from base.js.

2972

# In fact it works even with dummy cpn.

2973

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2974

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2975

2976

# # more consistent results setting it to right before the end

2977

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2988

# they're required, so send in a single value

qs.update({

'st': 0,

'et': video_length,

})

url = urllib.parse.urlunparse(

2995

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2996

2997

self._download_webpage(

2998

url, video_id, f'Marking {label}watched',

2999

'Unable to mark watched', fatal=False)

3000

3001

@classmethod

3002

def _extract_from_webpage(cls, url, webpage):

3003

# Invidious Instances

3004

# https://github.com/yt-dlp/yt-dlp/issues/195

3005

# https://github.com/iv-org/invidious/pull/1730

3006

mobj = re.search(

3007

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

3008

webpage)

3009

if mobj:

3010

yield cls.url_result(mobj.group('url'), cls)

3011

raise cls.StopExtraction()

3012

3013

yield from super()._extract_from_webpage(url, webpage)

3014

3015

# lazyYT YouTube embed

3016

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

3017

yield cls.url_result(unescapeHTML(id_), cls, id_)

3018

3019

# Wordpress "YouTube Video Importer" plugin

3020

for m in re.findall(r'''(?x)<div[^>]+

3021

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

3022

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

3023

yield cls.url_result(m[-1], cls, m[-1])

3024

3025

@classmethod

3026

def extract_id(cls, url):

3027

video_id = cls.get_temp_id(url)

3028

if not video_id:

3029

raise ExtractorError(f'Invalid URL: {url}')

3030

return video_id

3031

3032

def _extract_chapters_from_json(self, data, duration):

3033

chapter_list = traverse_obj(

3034

data, (

3035

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

3036

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

3037

), expected_type=list)

3038

3039

return self._extract_chapters(

3040

chapter_list,

3041

chapter_time=lambda chapter: float_or_none(

3042

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

3043

chapter_title=lambda chapter: traverse_obj(

3044

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

3045

duration=duration)

3046

3047

def _extract_chapters_from_engagement_panel(self, data, duration):

3048

content_list = traverse_obj(

3049

data,

3050

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

3051

expected_type=list, default=[])

3052

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

3053

chapter_title = lambda chapter: self._get_text(chapter, 'title')

3054

3055

return next(filter(None, (

3056

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

3057

chapter_time, chapter_title, duration)

3058

for contents in content_list)), [])

3059

3060

def _extract_chapters_from_description(self, description, duration):

3061

duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'

3062

sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'

3063

return self._extract_chapters(

3064

re.findall(sep_re % (duration_re, r'.+?'), description or ''),

3065

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

3066

duration=duration, strict=False) or self._extract_chapters(

3067

re.findall(sep_re % (r'.+?', duration_re), description or ''),

3068

chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],

3069

duration=duration, strict=False)

3070

3071

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

3076

'title': chapter_title(chapter),

3077

} for chapter in chapter_list or []]

3078

if not strict:

3079

chapter_list.sort(key=lambda c: c['start_time'] or 0)

3080

3081

chapters = [{'start_time': 0}]

3082

for idx, chapter in enumerate(chapter_list):

3083

if chapter['start_time'] is None:

3084

self.report_warning(f'Incomplete chapter {idx}')

3085

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

3086

chapters.append(chapter)

3087

elif chapter not in chapters:

3088

self.report_warning(

3089

f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')

3090

return chapters[1:]

3091

3092

def _extract_comment(self, comment_renderer, parent=None):

3093

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

3098

3099

# Timestamp is an estimate calculated from the current time and time_text

3100

time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''

3101

timestamp = self._parse_time_text(time_text)

3102

3103

author = self._get_text(comment_renderer, 'authorText')

3104

author_id = try_get(comment_renderer,

3105

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

3106

3107

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

3108

lambda x: x['likeCount']), str)) or 0

3109

author_thumbnail = try_get(comment_renderer,

3110

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

3111

3112

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

3113

is_favorited = 'creatorHeart' in (try_get(

3114

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

3119

'time_text': time_text,

3120

'like_count': votes,

3121

'is_favorited': is_favorited,

3122

'author': author,

3123

'author_id': author_id,

3124

'author_thumbnail': author_thumbnail,

3125

'author_is_uploader': author_is_uploader,

3126

'parent': parent or 'root'

3127

}

3128

3129

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

3130

3131

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

3132

3133

def extract_header(contents):

3134

_continuation = None

3135

for content in contents:

3136

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

3137

expected_comment_count = self._get_count(

3138

comments_header_renderer, 'countText', 'commentsCount')

3139

3140

if expected_comment_count:

3141

tracker['est_total'] = expected_comment_count

3142

self.to_screen(f'Downloading ~{expected_comment_count} comments')

3143

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

3144

3145

sort_menu_item = try_get(

3146

comments_header_renderer,

3147

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

3148

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

3149

3150

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

3151

if not _continuation:

3152

continue

3153

3154

sort_text = str_or_none(sort_menu_item.get('title'))

3155

if not sort_text:

3156

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

3157

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

3162

if not parent:

3163

tracker['current_page_thread'] = 0

3164

for content in contents:

3165

if not parent and tracker['total_parent_comments'] >= max_parents:

3166

yield

3167

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

3168

comment_renderer = get_first(

3169

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

3170

expected_type=dict, default={})

3171

3172

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

3177

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

3178

yield comment

3179

3180

# Attempt to get the replies

3181

comment_replies_renderer = try_get(

3182

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

3183

3184

if comment_replies_renderer:

3185

tracker['current_page_thread'] += 1

3186

comment_entries_iter = self._comment_entries(

3187

comment_replies_renderer, ytcfg, video_id,

3188

parent=comment.get('id'), tracker=tracker)

3189

yield from itertools.islice(comment_entries_iter, min(

3190

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

3191

3192

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

3198

total_parent_comments=0,

3199

total_reply_comments=0)

3200

3201

# TODO: Deprecated

3202

# YouTube comments have a max depth of 2

3203

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

3204

if max_depth:

3205

self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '

3206

'Set max replies in the max-comments extractor argument instead')

3207

if max_depth == 1 and parent:

3208

return

3209

3210

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

3211

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

3212

3213

continuation = self._extract_continuation(root_continuation_data)

3214

3215

response = None

3216

is_forced_continuation = False

3217

is_first_continuation = parent is None

3218

if is_first_continuation and not continuation:

3219

# Sometimes you can get comments by generating the continuation yourself,

3220

# even if YouTube initially reports them being disabled - e.g. stories comments.

3221

# Note: if the comment section is actually disabled, YouTube may return a response with

3222

# required check_get_keys missing. So we will disable that check initially in this case.

3223

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

3224

is_forced_continuation = True

3225

3226

for page_num in itertools.count(0):

3227

if not continuation:

3228

break

3229

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

3230

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

3231

if page_num == 0:

3232

if is_first_continuation:

3233

note_prefix = 'Downloading comment section API JSON'

3234

else:

3235

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

3236

tracker['current_page_thread'], comment_prog_str)

3237

else:

3238

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

3239

' ' if parent else '', ' replies' if parent else '',

3240

page_num, comment_prog_str)

3241

try:

3242

response = self._extract_response(

3243

item_id=None, query=continuation,

3244

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

3245

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

3246

except ExtractorError as e:

3247

# Ignore incomplete data error for replies if retries didn't work.

3248

# This is to allow any other parent comments and comment threads to be downloaded.

3249

# See: https://github.com/yt-dlp/yt-dlp/issues/4669

3250

if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:

3251

self.report_warning(

3252

'Received incomplete data for a comment reply thread and retrying did not help. '

3253

'Ignoring to let other comments be downloaded.')

3254

else:

3255

raise

3256

is_forced_continuation = False

3257

continuation_contents = traverse_obj(

3258

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

3259

3260

continuation = None

3261

for continuation_section in continuation_contents:

3262

continuation_items = traverse_obj(

3263

continuation_section,

3264

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3265

get_all=False, expected_type=list) or []

3266

if is_first_continuation:

3267

continuation = extract_header(continuation_items)

3268

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3282

if message and not parent and tracker['running_total'] == 0:

3283

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3284

raise self.CommentsDisabled

3285

3286

@staticmethod

3287

def _generate_comment_continuation(video_id):

3288

"""

3289

Generates initial comment section continuation token from given video id

3290

"""

3291

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3292

return base64.b64encode(token.encode()).decode()

3293

3294

def _get_comments(self, ytcfg, video_id, contents, webpage):

3295

"""Entry for comment extraction"""

3296

def _real_comment_extract(contents):

3297

renderer = next((

3298

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3299

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3300

yield from self._comment_entries(renderer, ytcfg, video_id)

3301

3302

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3303

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3304

3305

@staticmethod

3306

def _get_checkok_params():

3307

return {'contentCheckOk': True, 'racyCheckOk': True}

3308

3309

@classmethod

3310

def _generate_player_context(cls, sts=None):

3311

context = {

3312

'html5Preference': 'HTML5_PREF_WANTS',

3313

}

3314

if sts is not None:

3315

context['signatureTimestamp'] = sts

3316

return {

3317

'playbackContext': {

3318

'contentPlaybackContext': context

3319

},

3320

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3325

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3326

return True

3327

3328

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3329

AGE_GATE_REASONS = (

3330

'confirm your age', 'age-restricted', 'inappropriate', # reason

3331

'age_verification_required', 'age_check_required', # status

3332

)

3333

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3334

3335

@staticmethod

3336

def _is_unplayable(player_response):

3337

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3338

3339

_STORY_PLAYER_PARAMS = '8AEB'

3340

3341

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):

3342

3343

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3344

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3345

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3346

headers = self.generate_api_headers(

3347

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

}

if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':

3353

yt_query['params'] = self._STORY_PLAYER_PARAMS

3354

3355

yt_query.update(self._generate_player_context(sts))

3356

return self._extract_response(

3357

item_id=video_id, ep='player', query=yt_query,

3358

ytcfg=player_ytcfg, headers=headers, fatal=True,

3359

default_client=client,

3360

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3361

) or None

3362

3363

def _get_requested_clients(self, url, smuggled_data):

3364

requested_clients = []

3365

default = ['android', 'web']

3366

allowed_clients = sorted(

3367

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3368

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3369

for client in self._configuration_arg('player_client'):

3370

if client in allowed_clients:

3371

requested_clients.append(client)

3372

elif client == 'default':

3373

requested_clients.extend(default)

3374

elif client == 'all':

3375

requested_clients.extend(allowed_clients)

3376

else:

3377

self.report_warning(f'Skipping unsupported client {client}')

3378

if not requested_clients:

3379

requested_clients = default

3380

3381

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3382

requested_clients.extend(

3383

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3384

3385

return orderedSet(requested_clients)

3386

3387

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):

3388

initial_pr = None

3389

if webpage:

3390

initial_pr = self._search_json(

3391

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3392

3393

all_clients = set(clients)

3394

clients = clients[::-1]

3395

prs = []

3396

3397

def append_client(*client_names):

3398

""" Append the first client name that exists but not already used """

3399

for client_name in client_names:

3400

actual_client = _split_innertube_client(client_name)[0]

3401

if actual_client in INNERTUBE_CLIENTS:

3402

if actual_client not in all_clients:

3403

clients.append(client_name)

3404

all_clients.add(actual_client)

3405

return

3406

3407

# Android player_response does not have microFormats which are needed for

3408

# extraction of some data. So we return the initial_pr with formats

3409

# stripped out even if not requested by the user

3410

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3411

if initial_pr:

3412

pr = dict(initial_pr)

3413

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3418

player_url = None

3419

while clients:

3420

client, base_client, variant = _split_innertube_client(clients.pop())

3421

player_ytcfg = master_ytcfg if client == 'web' else {}

3422

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3423

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3424

3425

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3426

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3427

if 'js' in self._configuration_arg('player_skip'):

3428

require_js_player = False

3429

player_url = None

3430

3431

if not player_url and not tried_iframe_fallback and require_js_player:

3432

player_url = self._download_player_url(video_id)

3433

tried_iframe_fallback = True

3434

3435

try:

3436

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3437

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)

3438

except ExtractorError as e:

3439

if last_error:

3440

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3446

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3447

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3448

if pr_video_id and pr_video_id != video_id:

3449

self.report_warning(

3450

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3455

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3456

append_client(f'{base_client}_creator')

3457

elif self._is_agegated(pr):

3458

if variant == 'tv_embedded':

3459

append_client(f'{base_client}_embedded')

3460

elif not variant:

3461

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3467

return prs, player_url

3468

3469

def _needs_live_processing(self, live_status, duration):

3470

if (live_status == 'is_live' and self.get_param('live_from_start')

3471

or live_status == 'post_live' and (duration or 0) > 4 * 3600):

3472

return live_status

3473

3474

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):

3475

itags, stream_ids = {}, []

3476

itag_qualities, res_qualities = {}, {0: None}

3477

q = qualities([

3478

# Normally tiny is the smallest video-only formats. But

3479

# audio-only formats with unknown quality may get tagged as tiny

3480

'tiny',

3481

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3482

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3483

])

3484

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3485

3486

for fmt in streaming_formats:

3487

if fmt.get('targetDurationSec'):

3488

continue

3489

3490

itag = str_or_none(fmt.get('itag'))

3491

audio_track = fmt.get('audioTrack') or {}

3492

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3493

if stream_id in stream_ids:

3494

continue

3495

3496

quality = fmt.get('quality')

3497

height = int_or_none(fmt.get('height'))

3498

if quality == 'tiny' or not quality:

3499

quality = fmt.get('audioQuality', '').lower() or quality

3500

# The 3gp format (17) in android client has a quality of "small",

3501

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3507

if height:

3508

res_qualities[height] = quality

3509

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3510

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3511

# number of fragment that would subsequently requested with (`&sq=N`)

3512

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3513

continue

3514

3515

fmt_url = fmt.get('url')

3516

if not fmt_url:

3517

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3518

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3519

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3520

if not all((sc, fmt_url, player_url, encrypted_sig)):

3521

continue

3522

try:

3523

fmt_url += '&%s=%s' % (

3524

traverse_obj(sc, ('sp', -1)) or 'signature',

3525

self._decrypt_signature(encrypted_sig, video_id, player_url)

3526

)

3527

except ExtractorError as e:

3528

self.report_warning('Signature extraction failed: Some formats may be missing',

3529

video_id=video_id, only_once=True)

3530

self.write_debug(e, only_once=True)

3531

continue

3532

3533

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3538

fmt_url = update_url_query(fmt_url, {

3539

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3540

})

3541

except ExtractorError as e:

3542

phantomjs_hint = ''

3543

if isinstance(e, JSInterpreter.Exception):

3544

phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '

3545

f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')

3546

if player_url:

3547

self.report_warning(

3548

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3549

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3550

self.write_debug(e, only_once=True)

3551

else:

3552

self.report_warning(

3553

'Cannot decrypt nsig without player_url: You may experience throttling for some formats',

3554

video_id=video_id, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3559

stream_ids.append(stream_id)

3560

3561

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3562

language_preference = (

3563

10 if audio_track.get('audioIsDefault') and 10

3564

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3565

else -1)

3566

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3567

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3568

# Make sure to avoid false positives with small duration differences.

3569

# E.g. __2ABJjxzNo, ySuUZEjARPY

3570

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3571

if is_damaged:

3572

self.report_warning(

3573

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3574

dct = {

3575

'asr': int_or_none(fmt.get('audioSampleRate')),

3576

'filesize': int_or_none(fmt.get('contentLength')),

3577

'format_id': itag,

3578

'format_note': join_nonempty(

3579

'%s%s' % (audio_track.get('displayName') or '',

3580

' (default)' if language_preference > 0 else ''),

3581

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3582

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3583

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3584

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3585

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3586

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3587

'fps': int_or_none(fmt.get('fps')) or None,

3588

'audio_channels': fmt.get('audioChannels'),

3589

'height': height,

3590

'quality': q(quality),

3591

'has_drm': bool(fmt.get('drmFamilies')),

3592

'tbr': tbr,

3593

'url': fmt_url,

3594

'width': int_or_none(fmt.get('width')),

3595

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3596

'desc' if language_preference < -1 else ''),

3597

'language_preference': language_preference,

3598

# Strictly de-prioritize damaged and 3gp formats

3599

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3600

}

3601

mime_mobj = re.match(

3602

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3603

if mime_mobj:

3604

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3605

dct.update(parse_codecs(mime_mobj.group(2)))

3606

no_audio = dct.get('acodec') == 'none'

3607

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3613

dct['downloader_options'] = {

3614

# Youtube throttles chunks >~10M

3615

'http_chunk_size': 10485760,

3616

}

3617

if dct.get('ext'):

3618

dct['container'] = dct['ext'] + '_dash'

3619

yield dct

3620

3621

needs_live_processing = self._needs_live_processing(live_status, duration)

3622

skip_bad_formats = not self._configuration_arg('include_incomplete_formats')

3623

3624

skip_manifests = set(self._configuration_arg('skip'))

3625

if (not self.get_param('youtube_include_hls_manifest', True)

3626

or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway

3627

or needs_live_processing and skip_bad_formats):

3628

skip_manifests.add('hls')

3629

3630

if not self.get_param('youtube_include_dash_manifest', True):

3631

skip_manifests.add('dash')

3632

if self._configuration_arg('include_live_dash'):

3633

self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '

3634

'Use include_incomplete_formats extractor argument instead')

3635

elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':

3636

skip_manifests.add('dash')

3637

3638

def process_manifest_format(f, proto, itag):

3639

if itag in itags:

3640

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3641

return False

3642

itag = f'{itag}-{proto}'

3643

if itag:

3644

f['format_id'] = itag

3645

itags[itag] = proto

3646

3647

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3648

if f['quality'] == -1 and f.get('height'):

3649

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3654

hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')

3655

if hls_manifest_url:

3656

fmts, subs = self._extract_m3u8_formats_and_subtitles(

3657

hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')

3658

subtitles = self._merge_subtitles(subs, subtitles)

3659

for f in fmts:

3660

if process_manifest_format(f, 'hls', self._search_regex(

3661

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3662

yield f

3663

3664

dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')

3665

if dash_manifest_url:

3666

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3667

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3668

for f in formats:

3669

if process_manifest_format(f, 'dash', f['format_id']):

3670

f['filesize'] = int_or_none(self._search_regex(

3671

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3672

if needs_live_processing:

3673

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3679

spec = get_first(

3680

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3681

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3686

args = args.split('#')

3687

counts = list(map(int_or_none, args[:5]))

3688

if len(args) != 8 or not all(counts):

3689

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3690

continue

3691

width, height, frame_count, cols, rows = counts

3692

N, sigh = args[6:]

3693

3694

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3695

fragment_count = frame_count / (cols * rows)

3696

fragment_duration = duration / fragment_count

3697

yield {

3698

'format_id': f'sb{i}',

3699

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3712

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3713

} for j in range(math.ceil(fragment_count))],

3714

}

3715

3716

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3717

webpage = None

3718

if 'webpage' not in self._configuration_arg('player_skip'):

3719

query = {'bpctr': '9999999999', 'has_verified': '1'}

3720

if smuggled_data.get('is_story'):

3721

query['pp'] = self._STORY_PLAYER_PARAMS

3722

webpage = self._download_webpage(

3723

webpage_url, video_id, fatal=False, query=query)

3724

3725

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3726

3727

player_responses, player_url = self._extract_player_responses(

3728

self._get_requested_clients(url, smuggled_data),

3729

video_id, webpage, master_ytcfg, smuggled_data)

3730

3731

return webpage, master_ytcfg, player_responses, player_url

3732

3733

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3734

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3735

is_live = get_first(video_details, 'isLive')

3736

if is_live is None:

3737

is_live = get_first(live_broadcast_details, 'isLiveNow')

3738

live_content = get_first(video_details, 'isLiveContent')

3739

is_upcoming = get_first(video_details, 'isUpcoming')

3740

post_live = get_first(video_details, 'isPostLiveDvr')

3741

live_status = ('post_live' if post_live

3742

else 'is_live' if is_live

3743

else 'is_upcoming' if is_upcoming

3744

else 'was_live' if live_content

3745

else 'not_live' if False in (is_live, live_content)

3746

else None)

3747

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3748

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)

3749

3750

return live_broadcast_details, live_status, streaming_data, formats, subtitles

3751

3752

def _real_extract(self, url):

3753

url, smuggled_data = unsmuggle_url(url, {})

3754

video_id = self._match_id(url)

3755

3756

base_url = self.http_scheme() + '//www.youtube.com/'

3757

webpage_url = base_url + 'watch?v=' + video_id

3758

3759

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3760

3761

playability_statuses = traverse_obj(

3762

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3763

3764

trailer_video_id = get_first(

3765

playability_statuses,

3766

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3767

expected_type=str)

3768

if trailer_video_id:

3769

return self.url_result(

3770

trailer_video_id, self.ie_key(), trailer_video_id)

3771

3772

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3773

if webpage else (lambda x: None))

3774

3775

video_details = traverse_obj(

3776

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3777

microformats = traverse_obj(

3778

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3779

expected_type=dict, default=[])

3780

3781

translated_title = self._get_text(microformats, (..., 'title'))

3782

video_title = (self._preferred_lang and translated_title

3783

or get_first(video_details, 'title') # primary

3784

or translated_title

3785

or search_meta(['og:title', 'twitter:title', 'title']))

3786

translated_description = self._get_text(microformats, (..., 'description'))

3787

original_description = get_first(video_details, 'shortDescription')

3788

video_description = (

3789

self._preferred_lang and translated_description

3790

# If original description is blank, it will be an empty string.

3791

# Do not prefer translated description in this case.

3792

or original_description if original_description is not None else translated_description)

3793

3794

multifeed_metadata_list = get_first(

3795

player_responses,

3796

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3797

expected_type=str)

3798

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3799

if self.get_param('noplaylist'):

3800

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3805

# Unquote should take place before split on comma (,) since textual

3806

# fields may contain comma as well (see

3807

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3808

feed_data = urllib.parse.parse_qs(

3809

urllib.parse.unquote_plus(feed))

3810

3811

def feed_entry(name):

3812

return try_get(

3813

feed_data, lambda x: x[name][0], str)

3814

3815

feed_id = feed_entry('id')

3816

if not feed_id:

3817

continue

3818

feed_title = feed_entry('title')

3819

title = video_title

3820

if feed_title:

3821

title += ' (%s)' % feed_title

3822

entries.append({

3823

'_type': 'url_transparent',

3824

'ie_key': 'Youtube',

3825

'url': smuggle_url(

3826

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3827

{'force_singlefeed': True}),

3828

'title': title,

3829

})

3830

feed_ids.append(feed_id)

3831

self.to_screen(

3832

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3833

% (', '.join(feed_ids), video_id))

3834

return self.playlist_result(

3835

entries, video_id, video_title, video_description)

3836

3837

duration = (int_or_none(get_first(video_details, 'lengthSeconds'))

3838

or int_or_none(get_first(microformats, 'lengthSeconds'))

3839

or parse_duration(search_meta('duration')) or None)

3840

3841

live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \

3842

self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)

3843

if live_status == 'post_live':

3844

self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

3845

3846

if not formats:

3847

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3848

self.report_drm(video_id)

3849

pemr = get_first(

3850

playability_statuses,

3851

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3852

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3853

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3854

if subreason:

3855

if subreason == 'The uploader has not made this video available in your country.':

3856

countries = get_first(microformats, 'availableCountries')

3857

if not countries:

3858

regions_allowed = search_meta('regionsAllowed')

3859

countries = regions_allowed.split(',') if regions_allowed else None

3860

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3861

reason += f'. {subreason}'

3862

if reason:

3863

self.raise_no_formats(reason, expected=True)

3864

3865

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3866

if not keywords and webpage:

3867

keywords = [

3868

unescapeHTML(m.group('content'))

3869

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3870

for keyword in keywords:

3871

if keyword.startswith('yt:stretch='):

3872

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3873

if mobj:

3874

# NB: float is intentional for forcing float division

3875

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3880

f['stretched_ratio'] = ratio

3881

break

3882

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3883

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3884

if thumbnail_url:

3885

thumbnails.append({

3886

'url': thumbnail_url,

3887

})

3888

original_thumbnails = thumbnails.copy()

3889

3890

# The best resolution thumbnails sometimes does not appear in the webpage

3891

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3892

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3893

thumbnail_names = [

3894

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3895

# in resolution, these are not the custom thumbnail. So de-prioritize them

3896

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3897

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3898

]

3899

n_thumbnail_names = len(thumbnail_names)

3900

thumbnails.extend({

3901

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3902

video_id=video_id, name=name, ext=ext,

3903

webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),

3904

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3905

for thumb in thumbnails:

3906

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3907

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3908

self._remove_duplicate_formats(thumbnails)

3909

self._downloader._sort_thumbnails(original_thumbnails)

3910

3911

category = get_first(microformats, 'category') or search_meta('genre')

3912

channel_id = str_or_none(

3913

get_first(video_details, 'channelId')

3914

or get_first(microformats, 'externalChannelId')

3915

or search_meta('channelId'))

3916

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3917

3918

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3919

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3920

if not duration and live_end_time and live_start_time:

3921

duration = live_end_time - live_start_time

3922

3923

needs_live_processing = self._needs_live_processing(live_status, duration)

3924

3925

def is_bad_format(fmt):

3926

if needs_live_processing and not fmt.get('is_from_start'):

3927

return True

3928

elif (live_status == 'is_live' and needs_live_processing != 'is_live'

3929

and fmt.get('protocol') == 'http_dash_segments'):

3930

return True

3931

3932

for fmt in filter(is_bad_format, formats):

3933

fmt['preference'] = (fmt.get('preference') or -1) - 10

3934

fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')

3935

3936

if needs_live_processing:

3937

self._prepare_live_from_start_formats(

3938

formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')

3939

3940

formats.extend(self._extract_storyboard(player_responses, duration))

3941

3942

# source_preference is lower for throttled/potentially damaged formats

3943

self._sort_formats(formats, (

3944

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3949

'formats': formats,

3950

'thumbnails': thumbnails,

3951

# The best thumbnail that we are sure exists. Prevents unnecessary

3952

# URL checking if user don't care about getting the best possible thumbnail

3953

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3954

'description': video_description,

3955

'uploader': get_first(video_details, 'author'),

3956

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3957

'uploader_url': owner_profile_url,

3958

'channel_id': channel_id,

3959

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3960

'duration': duration,

3961

'view_count': int_or_none(

3962

get_first((video_details, microformats), (..., 'viewCount'))

3963

or search_meta('interactionCount')),

3964

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3965

'age_limit': 18 if (

3966

get_first(microformats, 'isFamilySafe') is False

3967

or search_meta('isFamilyFriendly') == 'false'

3968

or search_meta('og:restrictions:age') == '18+') else 0,

3969

'webpage_url': webpage_url,

3970

'categories': [category] if category else None,

3971

'tags': keywords,

3972

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3973

'live_status': live_status,

3974

'release_timestamp': live_start_time,

}

subtitles = {}

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3979

if pctr:

3980

def get_lang_code(track):

3981

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3982

or track.get('languageCode'))

3983

3984

# Converted into dicts to remove duplicates

3985

captions = {

3986

get_lang_code(sub): sub

3987

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3988

translation_languages = {

3989

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3990

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3991

3992

def process_language(container, base_url, lang_code, sub_name, query):

3993

lang_subs = container.setdefault(lang_code, [])

3994

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

4005

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

4006

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

4007

for lang_code, caption_track in captions.items():

4008

base_url = caption_track.get('baseUrl')

4009

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

4010

if not base_url:

4011

continue

4012

lang_name = self._get_text(caption_track, 'name', max_runs=1)

4013

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

4018

if not caption_track.get('isTranslatable'):

4019

continue

4020

for trans_code, trans_name in translation_languages.items():

4021

if not trans_code:

4022

continue

4023

orig_trans_code = trans_code

4024

if caption_track.get('kind') != 'asr':

4025

if not get_translated_subs:

4026

continue

4027

trans_code += f'-{lang_code}'

4028

trans_name += format_field(lang_name, None, ' from %s')

4029

# Add an "-orig" label to the original language so that it can be distinguished.

4030

# The subs are returned without "-orig" as well for compatibility

4031

if lang_code == f'a-{orig_trans_code}':

4032

process_language(

4033

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

4034

# Setting tlang=lang returns damaged subtitles.

4035

process_language(automatic_captions, base_url, trans_code, trans_name,

4036

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

4037

4038

info['automatic_captions'] = automatic_captions

4039

info['subtitles'] = subtitles

4040

4041

parsed_url = urllib.parse.urlparse(url)

4042

for component in [parsed_url.fragment, parsed_url.query]:

4043

query = urllib.parse.parse_qs(component)

4044

for k, v in query.items():

4045

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

4046

d_k += '_time'

4047

if d_k not in info and k in s_ks:

4048

info[d_k] = parse_duration(query[k][0])

4049

4050

# Youtube Music Auto-generated description

4051

if video_description:

4052

mobj = re.search(

4053

r'''(?xs)

4054

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

4055

(?P<album>[^\n]+)

4056

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

4057

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

4058

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

4059

.+\nAuto-generated\ by\ YouTube\.\s*$

4060

''', video_description)

4061

if mobj:

4062

release_year = mobj.group('release_year')

4063

release_date = mobj.group('release_date')

4064

if release_date:

4065

release_date = release_date.replace('-', '')

4066

if not release_year:

4067

release_year = release_date[:4]

4068

info.update({

4069

'album': mobj.group('album'.strip()),

4070

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

4071

'track': mobj.group('track').strip(),

4072

'release_date': release_date,

4073

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

4079

if not initial_data:

4080

query = {'videoId': video_id}

4081

query.update(self._get_checkok_params())

4082

initial_data = self._extract_response(

4083

item_id=video_id, ep='next', fatal=False,

4084

ytcfg=master_ytcfg, query=query,

4085

headers=self.generate_api_headers(ytcfg=master_ytcfg),

4086

note='Downloading initial data API JSON')

4087

4088

info['comment_count'] = traverse_obj(initial_data, (

4089

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

4090

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

4091

), (

4092

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

4093

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

4094

), expected_type=int_or_none, get_all=False)

4095

4096

try: # This will error if there is no livechat

4097

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

4098

except (KeyError, IndexError, TypeError):

4099

pass

4100

else:

4101

info.setdefault('subtitles', {})['live_chat'] = [{

4102

# url is needed to set cookies

4103

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

4104

'video_id': video_id,

4105

'ext': 'json',

4106

'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')

4107

else 'youtube_live_chat_replay'),

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

4113

or self._extract_chapters_from_engagement_panel(initial_data, duration)

4114

or self._extract_chapters_from_description(video_description, duration)

4115

or None)

4116

4117

contents = traverse_obj(

4118

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

4119

expected_type=list, default=[])

4120

4121

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

4122

if vpir:

4123

stl = vpir.get('superTitleLink')

4124

if stl:

4125

stl = self._get_text(stl)

4126

if try_get(

4127

vpir,

4128

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

4129

info['location'] = stl

4130

else:

4131

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

4132

if mobj:

4133

info.update({

4134

'series': mobj.group(1),

4135

'season_number': int(mobj.group(2)),

4136

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

list) or []):

tbrs = variadic(

traverse_obj(

tlb, 'toggleButtonRenderer',

4145

('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),

4146

default=[]))

4147

for tbr in tbrs:

4148

for getter, regex in [(

4149

lambda x: x['defaultText']['accessibility']['accessibilityData'],

4150

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

4151

lambda x: x['accessibility'],

4152

lambda x: x['accessibilityData']['accessibilityData'],

4153

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

4154

label = (try_get(tbr, getter, dict) or {}).get('label')

4155

if label:

4156

mobj = re.match(regex, label)

4157

if mobj:

4158

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

4159

break

4160

sbr_tooltip = try_get(

4161

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

4162

if sbr_tooltip:

4163

like_count, dislike_count = sbr_tooltip.split(' / ')

4164

info.update({

4165

'like_count': str_to_int(like_count),

4166

'dislike_count': str_to_int(dislike_count),

4167

})

4168

vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))

4169

if vcr:

4170

vc = self._get_count(vcr, 'viewCount')

4171

# Upcoming premieres with waiting count are treated as live here

4172

if vcr.get('isLive'):

4173

info['concurrent_view_count'] = vc

4174

elif info.get('view_count') is None:

4175

info['view_count'] = vc

4176

4177

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

4178

if vsir:

4179

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

4180

info.update({

4181

'channel': self._get_text(vor, 'title'),

4182

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

4187

list) or []

4188

multiple_songs = False

4189

for row in rows:

4190

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

4191

multiple_songs = True

4192

break

4193

for row in rows:

4194

mrr = row.get('metadataRowRenderer') or {}

4195

mrr_title = mrr.get('title')

4196

if not mrr_title:

4197

continue

4198

mrr_title = self._get_text(mrr, 'title')

4199

mrr_contents_text = self._get_text(mrr, ('contents', 0))

4200

if mrr_title == 'License':

4201

info['license'] = mrr_contents_text

4202

elif not multiple_songs:

4203

if mrr_title == 'Album':

4204

info['album'] = mrr_contents_text

4205

elif mrr_title == 'Artist':

4206

info['artist'] = mrr_contents_text

4207

elif mrr_title == 'Song':

4208

info['track'] = mrr_contents_text

4209

4210

fallbacks = {

4211

'channel': 'uploader',

4212

'channel_id': 'uploader_id',

4213

'channel_url': 'uploader_url',

4214

}

4215

4216

# The upload date for scheduled, live and past live streams / premieres in microformats

4217

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

4218

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

4219

upload_date = (

4220

unified_strdate(get_first(microformats, 'uploadDate'))

4221

or unified_strdate(search_meta('uploadDate')))

4222

if not upload_date or (

4223

live_status in ('not_live', None)

4224

and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])

4225

):

4226

upload_date = strftime_or_none(

4227

self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date

4228

info['upload_date'] = upload_date

4229

4230

for to, frm in fallbacks.items():

4231

if not info.get(to):

4232

info[to] = info.get(frm)

4233

4234

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))

4240

4241

is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4242

or get_first(video_details, 'isPrivate', expected_type=bool))

4243

4244

info['availability'] = (

4245

'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4246

else self._availability(

4247

is_private=is_private,

4248

needs_premium=(

4249

self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)

4250

or False if initial_data and is_private is not None else None),

4251

needs_subscription=(

4252

self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)

4253

or False if initial_data and is_private is not None else None),

4254

needs_auth=info['age_limit'] >= 18,

4255

is_unlisted=None if is_private is None else (

4256

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4257

or get_first(microformats, 'isUnlisted', expected_type=bool))))

4258

4259

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

4260

4261

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

4267

@staticmethod

4268

def passthrough_smuggled_data(func):

4269

def _smuggle(info, smuggled_data):

4270

if info.get('_type') not in ('url', 'url_transparent'):

4271

return info

4272

if smuggled_data.get('is_music_url'):

4273

parsed_url = urllib.parse.urlparse(info['url'])

4274

if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):

4275

smuggled_data.pop('is_music_url')

4276

info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))

4277

if smuggled_data:

4278

info['url'] = smuggle_url(info['url'], smuggled_data)

4279

return info

4280

4281

@functools.wraps(func)

4282

def wrapper(self, url):

4283

url, smuggled_data = unsmuggle_url(url, {})

4284

if self.is_music_url(url):

4285

smuggled_data['is_music_url'] = True

4286

info_dict = func(self, url, smuggled_data)

4287

if smuggled_data:

4288

_smuggle(info_dict, smuggled_data)

4289

if info_dict.get('entries'):

4290

info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

4295

channel_id = self._html_search_meta(

4296

'channelId', webpage, 'channel id', default=None)

4297

if channel_id:

4298

return channel_id

4299

channel_url = self._html_search_meta(

4300

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

4301

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

4302

'twitter:app:url:googleplay'), webpage, 'channel url')

4303

return self._search_regex(

4304

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

4305

channel_url, 'channel id')

4306

4307

@staticmethod

4308

def _extract_basic_item_renderer(item):

4309

# Modified from _extract_grid_item_renderer

4310

known_basic_renderers = (

4311

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

4312

)

4313

for key, renderer in item.items():

4314

if not isinstance(renderer, dict):

4315

continue

4316

elif key in known_basic_renderers:

4317

return renderer

4318

elif key.startswith('grid') and key.endswith('Renderer'):

4319

return renderer

4320

4321

def _grid_entries(self, grid_renderer):

4322

for item in grid_renderer['items']:

4323

if not isinstance(item, dict):

4324

continue

4325

renderer = self._extract_basic_item_renderer(item)

4326

if not isinstance(renderer, dict):

4327

continue

4328

title = self._get_text(renderer, 'title')

4329

4330

# playlist

4331

playlist_id = renderer.get('playlistId')

4332

if playlist_id:

4333

yield self.url_result(

4334

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4335

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4340

if video_id:

4341

yield self._extract_video(renderer)

4342

continue

4343

# channel

4344

channel_id = renderer.get('channelId')

4345

if channel_id:

4346

yield self.url_result(

4347

'https://www.youtube.com/channel/%s' % channel_id,

4348

ie=YoutubeTabIE.ie_key(), video_title=title)

4349

continue

4350

# generic endpoint URL support

4351

ep_url = urljoin('https://www.youtube.com/', try_get(

4352

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4353

str))

4354

if ep_url:

4355

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4356

if ie.suitable(ep_url):

4357

yield self.url_result(

4358

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4359

break

4360

4361

def _music_reponsive_list_entry(self, renderer):

4362

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4363

if video_id:

4364

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4365

ie=YoutubeIE.ie_key(), video_id=video_id)

4366

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4367

if playlist_id:

4368

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4369

if video_id:

4370

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4371

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4372

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4373

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4374

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4375

if browse_id:

4376

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4377

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4378

4379

def _shelf_entries_from_content(self, shelf_renderer):

4380

content = shelf_renderer.get('content')

4381

if not isinstance(content, dict):

4382

return

4383

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4384

if renderer:

4385

# TODO: add support for nested playlists so each shelf is processed

4386

# as separate playlist

4387

# TODO: this includes only first N items

4388

yield from self._grid_entries(renderer)

4389

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4395

ep = try_get(

4396

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4397

str)

4398

shelf_url = urljoin('https://www.youtube.com', ep)

4399

if shelf_url:

4400

# Skipping links to another channels, note that checking for

4401

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4402

# will not work

4403

if skip_channels and '/channels?' in shelf_url:

4404

return

4405

title = self._get_text(shelf_renderer, 'title')

4406

yield self.url_result(shelf_url, video_title=title)

4407

# Shelf may not contain shelf URL, fallback to extraction from content

4408

yield from self._shelf_entries_from_content(shelf_renderer)

4409

4410

def _playlist_entries(self, video_list_renderer):

4411

for content in video_list_renderer['contents']:

4412

if not isinstance(content, dict):

4413

continue

4414

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4415

if not isinstance(renderer, dict):

4416

continue

4417

video_id = renderer.get('videoId')

4418

if not video_id:

4419

continue

4420

yield self._extract_video(renderer)

4421

4422

def _rich_entries(self, rich_grid_renderer):

4423

renderer = traverse_obj(

4424

rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}

4425

video_id = renderer.get('videoId')

4426

if not video_id:

4427

return

4428

yield self._extract_video(renderer)

4429

4430

def _video_entry(self, video_renderer):

4431

video_id = video_renderer.get('videoId')

4432

if video_id:

4433

return self._extract_video(video_renderer)

4434

4435

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4436

url = urljoin('https://youtube.com', traverse_obj(

4437

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4438

if url:

4439

return self.url_result(

4440

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4441

4442

def _post_thread_entries(self, post_thread_renderer):

4443

post_renderer = try_get(

4444

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4445

if not post_renderer:

4446

return

4447

# video attachment

4448

video_renderer = try_get(

4449

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4450

video_id = video_renderer.get('videoId')

4451

if video_id:

4452

entry = self._extract_video(video_renderer)

4453

if entry:

4454

yield entry

4455

# playlist attachment

4456

playlist_id = try_get(

4457

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4458

if playlist_id:

4459

yield self.url_result(

4460

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4461

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4462

# inline video links

4463

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4464

for run in runs:

4465

if not isinstance(run, dict):

4466

continue

4467

ep_url = try_get(

4468

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4469

if not ep_url:

4470

continue

4471

if not YoutubeIE.suitable(ep_url):

4472

continue

4473

ep_video_id = YoutubeIE._match_id(ep_url)

4474

if video_id == ep_video_id:

4475

continue

4476

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4477

4478

def _post_thread_continuation_entries(self, post_thread_continuation):

4479

contents = post_thread_continuation.get('contents')

4480

if not isinstance(contents, list):

4481

return

4482

for content in contents:

4483

renderer = content.get('backstagePostThreadRenderer')

4484

if isinstance(renderer, dict):

4485

yield from self._post_thread_entries(renderer)

4486

continue

4487

renderer = content.get('videoRenderer')

4488

if isinstance(renderer, dict):

4489

yield self._video_entry(renderer)

4490

4491

r''' # unused

4492

def _rich_grid_entries(self, contents):

4493

for content in contents:

4494

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4495

if video_renderer:

4496

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _report_history_entries(self, renderer):

4502

for url in traverse_obj(renderer, (

4503

'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,

4504

'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,

4505

'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):

4506

yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)

4507

4508

def _extract_entries(self, parent_renderer, continuation_list):

4509

# continuation_list is modified in-place with continuation_list = [continuation_token]

4510

continuation_list[:] = [None]

4511

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4512

for content in contents:

4513

if not isinstance(content, dict):

4514

continue

4515

is_renderer = traverse_obj(

4516

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4517

expected_type=dict)

4518

if not is_renderer:

4519

if content.get('richItemRenderer'):

4520

for entry in self._rich_entries(content['richItemRenderer']):

4521

yield entry

4522

continuation_list[0] = self._extract_continuation(parent_renderer)

4523

elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory

4524

table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))

4525

yield from self._report_history_entries(table)

4526

continuation_list[0] = self._extract_continuation(table)

4527

continue

4528

4529

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4530

for isr_content in isr_contents:

4531

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4536

'gridRenderer': self._grid_entries,

4537

'reelShelfRenderer': self._grid_entries,

4538

'shelfRenderer': self._shelf_entries,

4539

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4540

'backstagePostThreadRenderer': self._post_thread_entries,

4541

'videoRenderer': lambda x: [self._video_entry(x)],

4542

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4543

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4544

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4545

}

4546

for key, renderer in isr_content.items():

4547

if key not in known_renderers:

4548

continue

4549

for entry in known_renderers[key](renderer):

4550

if entry:

4551

yield entry

4552

continuation_list[0] = self._extract_continuation(renderer)

4553

break

4554

4555

if not continuation_list[0]:

4556

continuation_list[0] = self._extract_continuation(is_renderer)

4557

4558

if not continuation_list[0]:

4559

continuation_list[0] = self._extract_continuation(parent_renderer)

4560

4561

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4562

continuation_list = [None]

4563

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4564

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4569

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4570

yield from extract_entries(parent_renderer)

4571

continuation = continuation_list[0]

4572

4573

for page_num in itertools.count(1):

4574

if not continuation:

4575

break

4576

headers = self.generate_api_headers(

4577

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4578

response = self._extract_response(

4579

item_id=f'{item_id} page {page_num}',

4580

query=continuation, headers=headers, ytcfg=ytcfg,

4581

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4586

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4587

visitor_data = self._extract_visitor_data(response) or visitor_data

4588

4589

known_renderers = {

4590

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4591

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4592

'gridVideoRenderer': (self._grid_entries, 'items'),

4593

'gridChannelRenderer': (self._grid_entries, 'items'),

4594

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4595

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4596

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4597

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),

4598

'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),

4599

'playlistVideoListContinuation': (self._playlist_entries, None),

4600

'gridContinuation': (self._grid_entries, None),

4601

'itemSectionContinuation': (self._post_thread_continuation_entries, None),

4602

'sectionListContinuation': (extract_entries, None), # for feeds

4603

}

4604

4605

continuation_items = traverse_obj(response, (

4606

('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,

4607

'appendContinuationItemsAction', 'continuationItems'

4608

), 'continuationContents', get_all=False)

4609

continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})

4610

4611

video_items_renderer = None

4612

for key in continuation_item.keys():

4613

if key not in known_renderers:

4614

continue

4615

func, parent_key = known_renderers[key]

4616

video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items

4617

continuation_list = [None]

4618

yield from func(video_items_renderer)

4619

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4620

4621

if not video_items_renderer:

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4626

for tab_renderer in tabs:

4627

if tab_renderer.get('selected'):

4628

return tab_renderer

4629

if fatal:

4630

raise ExtractorError('Unable to find selected tab')

4631

4632

@staticmethod

4633

def _extract_tab_renderers(response):

4634

return traverse_obj(

4635

response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)

4636

4637

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4638

metadata = self._extract_metadata_from_tabs(item_id, data)

4639

4640

selected_tab = self._extract_selected_tab(tabs)

4641

metadata['title'] += format_field(selected_tab, 'title', ' - %s')

4642

metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')

4643

4644

return self.playlist_result(

4645

self._entries(

4646

selected_tab, metadata['id'], ytcfg,

4647

self._extract_account_syncid(ytcfg, data),

4648

self._extract_visitor_data(data, ytcfg)),

4649

**metadata)

4650

4651

def _extract_metadata_from_tabs(self, item_id, data):

4652

info = {'id': item_id}

4653

4654

metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)

4655

if metadata_renderer:

4656

info.update({

4657

'uploader': metadata_renderer.get('title'),

4658

'uploader_id': metadata_renderer.get('externalId'),

4659

'uploader_url': metadata_renderer.get('channelUrl'),

4660

})

4661

if info['uploader_id']:

4662

info['id'] = info['uploader_id']

4663

else:

4664

metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)

4665

4666

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4667

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4668

def _get_uncropped(url):

4669

return url_or_none((url or '').split('=')[0] + '=s0')

4670

4671

avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')

4672

if avatar_thumbnails:

4673

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4674

if uncropped_avatar:

4675

avatar_thumbnails.append({

4676

'url': uncropped_avatar,

4677

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4682

data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))

4683

for banner in channel_banners:

4684

banner['preference'] = -10

4685

4686

if channel_banners:

4687

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4688

if uncropped_banner:

4689

channel_banners.append({

4690

'url': uncropped_banner,

4691

'id': 'banner_uncropped',

'preference': -5

})

# Deprecated - remove primary_sidebar_renderer when layout discontinued

4696

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4697

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)

4698

4699

primary_thumbnails = self._extract_thumbnails(

4700

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4701

playlist_thumbnails = self._extract_thumbnails(

4702

playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))

4703

4704

info.update({

4705

'title': (traverse_obj(metadata_renderer, 'title')

4706

or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))

4707

or info['id']),

4708

'availability': self._extract_availability(data),

4709

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4710

'description': try_get(metadata_renderer, lambda x: x.get('description', '')),

4711

'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),

4712

'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,

4713

})

4714

4715

# Playlist stats is a text runs array containing [video count, view count, last updated].

4716

# last updated or (view count and last updated) may be missing.

4717

playlist_stats = get_first(

4718

(primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))

4719

4720

last_updated_unix = self._parse_time_text(

4721

self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued

4722

or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))

4723

info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')

4724

4725

info['view_count'] = self._get_count(playlist_stats, 1)

4726

if info['view_count'] is None: # 0 is allowed

4727

info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')

4728

4729

info['playlist_count'] = self._get_count(playlist_stats, 0)

4730

if info['playlist_count'] is None: # 0 is allowed

4731

info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))

4732

4733

if not info.get('uploader_id'):

4734

owner = traverse_obj(playlist_header_renderer, 'ownerText')

4735

if not owner: # Deprecated

4736

owner = traverse_obj(

4737

self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),

4738

('videoOwner', 'videoOwnerRenderer', 'title'))

4739

owner_text = self._get_text(owner)

4740

browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}

4741

info.update({

4742

'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),

4743

'uploader_id': browse_ep.get('browseId'),

4744

'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))

})

info.update({

'channel': info['uploader'],

4749

'channel_id': info['uploader_id'],

4750

'channel_url': info['uploader_url']

})

return info

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4755

first_id = last_id = response = None

4756

for page_num in itertools.count(1):

4757

videos = list(self._playlist_entries(playlist))

4758

if not videos:

4759

return

4760

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4761

if start >= len(videos):

4762

return

4763

yield from videos[start:]

4764

first_id = first_id or videos[0]['id']

4765

last_id = videos[-1]['id']

4766

watch_endpoint = try_get(

4767

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4768

headers = self.generate_api_headers(

4769

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4770

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4771

query = {

4772

'playlistId': playlist_id,

4773

'videoId': watch_endpoint.get('videoId') or last_id,

4774

'index': watch_endpoint.get('index') or len(videos),

4775

'params': watch_endpoint.get('params') or 'OAE%3D'

4776

}

4777

response = self._extract_response(

4778

item_id='%s page %d' % (playlist_id, page_num),

4779

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4780

check_get_keys='contents'

4781

)

4782

playlist = try_get(

4783

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4784

4785

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4786

title = playlist.get('title') or try_get(

4787

data, lambda x: x['titleText']['simpleText'], str)

4788

playlist_id = playlist.get('playlistId') or item_id

4789

4790

# Delegating everything except mix playlists to regular tab-based playlist URL

4791

playlist_url = urljoin(url, try_get(

4792

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4793

str))

4794

4795

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4796

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4797

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4798

4799

if playlist_url and playlist_url != url and not is_known_unviewable:

4800

return self.url_result(

4801

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4802

video_title=title)

4803

4804

return self.playlist_result(

4805

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4806

playlist_id=playlist_id, playlist_title=title)

4807

4808

def _extract_availability(self, data):

4809

"""

4810

Gets the availability of a given playlist/tab.

4811

Note: Unless YouTube tells us explicitly, we do not assume it is public

4812

@param data: response

4813

"""

4814

sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4815

playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}

4816

player_header_privacy = playlist_header_renderer.get('privacy')

4817

4818

badges = self._extract_badges(sidebar_renderer)

4819

4820

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4821

privacy_setting_icon = get_first(

4822

(playlist_header_renderer, sidebar_renderer),

4823

('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',

4824

lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),

4825

expected_type=str)

4826

4827

microformats_is_unlisted = traverse_obj(

4828

data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)

return (

'public' if (

self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)

4833

or player_header_privacy == 'PUBLIC'

4834

or privacy_setting_icon == 'PRIVACY_PUBLIC')

4835

else self._availability(

4836

is_private=(

4837

self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)

4838

or player_header_privacy == 'PRIVATE' if player_header_privacy is not None

4839

else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),

4840

is_unlisted=(

4841

self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)

4842

or player_header_privacy == 'UNLISTED' if player_header_privacy is not None

4843

else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None

4844

else microformats_is_unlisted if microformats_is_unlisted is not None else None),

4845

needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,

4846

needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,

needs_auth=False))

@staticmethod

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4851

sidebar_renderer = try_get(

4852

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4853

for item in sidebar_renderer:

4854

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4859

"""

4860

Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)

4861

"""

4862

is_playlist = bool(traverse_obj(

4863

data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))

4864

if not is_playlist:

4865

return

4866

headers = self.generate_api_headers(

4867

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4868

visitor_data=self._extract_visitor_data(data, ytcfg))

4869

query = {

4870

'params': 'wgYCCAA=',

4871

'browseId': f'VL{item_id}'

4872

}

4873

return self._extract_response(

4874

item_id=item_id, headers=headers, query=query,

4875

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4876

note='Redownloading playlist API JSON with unavailable videos')

4877

4878

@functools.cached_property

4879

def skip_webpage(self):

4880

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4881

4882

def _extract_webpage(self, url, item_id, fatal=True):

4883

webpage, data = None, None

4884

for retry in self.RetryManager(fatal=fatal):

4885

try:

4886

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4887

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4888

except ExtractorError as e:

4889

if isinstance(e.cause, network_exceptions):

4890

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4891

retry.error = e

4892

continue

4893

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4898

except ExtractorError as e:

4899

self._error_or_warning(e, fatal=fatal)

4900

break

4901

4902

# Sometimes youtube returns a webpage with incomplete ytInitialData

4903

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4904

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4905

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4911

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4912

if not ytcfg and self.is_authenticated:

4913

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4914

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4915

raise ExtractorError(

4916

f'{msg}. If you are not downloading private content, or '

4917

'your cookies are only for the first account and channel,'

4918

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4919

expected=True)

4920

self.report_warning(msg, only_once=True)

4921

4922

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4923

data = None

4924

if not self.skip_webpage:

4925

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4926

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4927

# Reject webpage data if redirected to home page without explicitly requesting

4928

selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}

4929

if (url != 'https://www.youtube.com/feed/recommended'

4930

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4931

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4932

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4933

if fatal:

4934

raise ExtractorError(msg, expected=True)

4935

self.report_warning(msg, only_once=True)

4936

if not data:

4937

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4938

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4939

return data, ytcfg

4940

4941

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4942

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4943

resolve_response = self._extract_response(

4944

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4945

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4946

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4947

for ep_key, ep in endpoints.items():

4948

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4949

if params:

4950

return self._extract_response(

4951

item_id=item_id, query=params, ep=ep, headers=headers,

4952

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4953

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4954

err_note = 'Failed to resolve url (does the playlist exist?)'

4955

if fatal:

4956

raise ExtractorError(err_note, expected=True)

4957

self.report_warning(err_note, item_id)

4958

4959

_SEARCH_PARAMS = None

4960

4961

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4962

data = {'query': query}

4963

if params is NO_DEFAULT:

4964

params = self._SEARCH_PARAMS

4965

if params:

4966

data['params'] = params

4967

4968

content_keys = (

4969

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4970

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4971

# ytmusic search

4972

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4973

('continuationContents', ),

4974

)

4975

display_id = f'query "{query}"'

4976

check_get_keys = tuple({keys[0] for keys in content_keys})

4977

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4978

self._report_playlist_authcheck(ytcfg, fatal=False)

4979

4980

continuation_list = [None]

4981

search = None

4982

for page_num in itertools.count(1):

4983

data.update(continuation_list[0] or {})

4984

headers = self.generate_api_headers(

4985

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4986

search = self._extract_response(

4987

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4988

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4989

slr_contents = traverse_obj(search, *content_keys)

4990

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4991

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4996

IE_DESC = 'YouTube Tabs'

4997

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

5006

(?P<not_channel>

5007

feed/|hashtag/|

5008

(?:playlist|watch)\?.*?\blist=

5009

)|

5010

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

5015

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5016

}

5017

IE_NAME = 'youtube:tab'

5018

5019

_TESTS = [{

5020

'note': 'playlists, multipage',

5021

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

5022

'playlist_mincount': 94,

5023

'info_dict': {

5024

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5025

'title': 'Igor Kleiner - Playlists',

5026

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5027

'uploader': 'Igor Kleiner',

5028

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5029

'channel': 'Igor Kleiner',

5030

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5031

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5032

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5033

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5034

'channel_follower_count': int

5035

},

5036

}, {

5037

'note': 'playlists, multipage, different order',

5038

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

5039

'playlist_mincount': 94,

5040

'info_dict': {

5041

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

5042

'title': 'Igor Kleiner - Playlists',

5043

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

5044

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5045

'uploader': 'Igor Kleiner',

5046

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5047

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

5048

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

5049

'channel': 'Igor Kleiner',

5050

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

5051

'channel_follower_count': int

5052

},

5053

}, {

5054

'note': 'playlists, series',

5055

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

5056

'playlist_mincount': 5,

5057

'info_dict': {

5058

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5059

'title': '3Blue1Brown - Playlists',

5060

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5061

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5062

'uploader': '3Blue1Brown',

5063

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5064

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5065

'channel': '3Blue1Brown',

5066

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5067

'tags': ['Mathematics'],

5068

'channel_follower_count': int

5069

},

5070

}, {

5071

'note': 'playlists, singlepage',

5072

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

5073

'playlist_mincount': 4,

5074

'info_dict': {

5075

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5076

'title': 'ThirstForScience - Playlists',

5077

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

5078

'uploader': 'ThirstForScience',

5079

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5080

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5081

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

5082

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

5083

'tags': 'count:13',

5084

'channel': 'ThirstForScience',

5085

'channel_follower_count': int

5086

}

5087

}, {

5088

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

5089

'only_matching': True,

5090

}, {

5091

'note': 'basic, single video playlist',

5092

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5093

'info_dict': {

5094

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5095

'uploader': 'Sergey M.',

5096

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5097

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

5102

'channel': 'Sergey M.',

5103

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5104

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5105

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5106

'availability': 'public',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

5111

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5112

'info_dict': {

5113

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5114

'uploader': 'Sergey M.',

5115

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

5116

'title': 'youtube-dl empty playlist',

5117

'tags': [],

5118

'channel': 'Sergey M.',

5119

'description': '',

5120

'modified_date': '20160902',

5121

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

5122

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5123

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5124

'availability': 'public',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

5130

'info_dict': {

5131

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5132

'title': 'lex will - Home',

5133

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5134

'uploader': 'lex will',

5135

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5136

'channel': 'lex will',

5137

'tags': ['bible', 'history', 'prophesy'],

5138

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5139

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5140

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5141

'channel_follower_count': int

5142

},

5143

'playlist_mincount': 2,

5144

}, {

5145

'note': 'Videos tab',

5146

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

5147

'info_dict': {

5148

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5149

'title': 'lex will - Videos',

5150

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5151

'uploader': 'lex will',

5152

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5153

'tags': ['bible', 'history', 'prophesy'],

5154

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5155

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5156

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5157

'channel': 'lex will',

5158

'channel_follower_count': int

5159

},

5160

'playlist_mincount': 975,

5161

}, {

5162

'note': 'Videos tab, sorted by popular',

5163

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

5164

'info_dict': {

5165

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5166

'title': 'lex will - Videos',

5167

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5168

'uploader': 'lex will',

5169

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5170

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5171

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5172

'channel': 'lex will',

5173

'tags': ['bible', 'history', 'prophesy'],

5174

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5175

'channel_follower_count': int

5176

},

5177

'playlist_mincount': 199,

5178

}, {

5179

'note': 'Playlists tab',

5180

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

5181

'info_dict': {

5182

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5183

'title': 'lex will - Playlists',

5184

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5185

'uploader': 'lex will',

5186

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5187

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5188

'channel': 'lex will',

5189

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5190

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5191

'tags': ['bible', 'history', 'prophesy'],

5192

'channel_follower_count': int

5193

},

5194

'playlist_mincount': 17,

5195

}, {

5196

'note': 'Community tab',

5197

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

5198

'info_dict': {

5199

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5200

'title': 'lex will - Community',

5201

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5202

'uploader': 'lex will',

5203

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5204

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5205

'channel': 'lex will',

5206

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5207

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5208

'tags': ['bible', 'history', 'prophesy'],

5209

'channel_follower_count': int

5210

},

5211

'playlist_mincount': 18,

5212

}, {

5213

'note': 'Channels tab',

5214

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

5215

'info_dict': {

5216

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5217

'title': 'lex will - Channels',

5218

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

5219

'uploader': 'lex will',

5220

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5221

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5222

'channel': 'lex will',

5223

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

5224

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

5225

'tags': ['bible', 'history', 'prophesy'],

5226

'channel_follower_count': int

5227

},

5228

'playlist_mincount': 12,

5229

}, {

5230

'note': 'Search tab',

5231

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

5232

'playlist_mincount': 40,

5233

'info_dict': {

5234

'id': 'UCYO_jab_esuFRV4b17AJtAw',

5235

'title': '3Blue1Brown - Search - linear algebra',

5236

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

5237

'uploader': '3Blue1Brown',

5238

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

5239

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5240

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

5241

'tags': ['Mathematics'],

5242

'channel': '3Blue1Brown',

5243

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

5244

'channel_follower_count': int

5245

},

5246

}, {

5247

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5248

'only_matching': True,

5249

}, {

5250

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5251

'only_matching': True,

5252

}, {

5253

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

5254

'only_matching': True,

5255

}, {

5256

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

5257

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5258

'info_dict': {

5259

'title': '29C3: Not my department',

5260

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

5261

'uploader': 'Christiaan008',

5262

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5263

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

5264

'tags': [],

5265

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5266

'view_count': int,

5267

'modified_date': '20150605',

5268

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

5269

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

5270

'channel': 'Christiaan008',

5271

'availability': 'public',

5272

},

5273

'playlist_count': 96,

5274

}, {

5275

'note': 'Large playlist',

5276

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

5277

'info_dict': {

5278

'title': 'Uploads from Cauchemar',

5279

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

5280

'uploader': 'Cauchemar',

5281

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5282

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

5283

'tags': [],

5284

'modified_date': r're:\d{8}',

5285

'channel': 'Cauchemar',

5286

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

5287

'view_count': int,

5288

'description': '',

5289

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

5290

'availability': 'public',

5291

},

5292

'playlist_mincount': 1123,

5293

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5294

}, {

5295

'note': 'even larger playlist, 8832 videos',

5296

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

5297

'only_matching': True,

5298

}, {

5299

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

5300

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

5301

'info_dict': {

5302

'title': 'Uploads from Interstellar Movie',

5303

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

5304

'uploader': 'Interstellar Movie',

5305

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5306

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

5307

'tags': [],

5308

'view_count': int,

5309

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

5310

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

5311

'channel': 'Interstellar Movie',

5312

'description': '',

5313

'modified_date': r're:\d{8}',

5314

'availability': 'public',

5315

},

5316

'playlist_mincount': 21,

5317

}, {

5318

'note': 'Playlist with "show unavailable videos" button',

5319

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

5320

'info_dict': {

5321

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

5322

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

5323

'uploader': 'Phim Siêu Nhân Nhật Bản',

5324

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5325

'view_count': int,

5326

'channel': 'Phim Siêu Nhân Nhật Bản',

5327

'tags': [],

5328

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5329

'description': '',

5330

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5331

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5332

'modified_date': r're:\d{8}',

5333

'availability': 'public',

5334

},

5335

'playlist_mincount': 200,

5336

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5337

}, {

5338

'note': 'Playlist with unavailable videos in page 7',

5339

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5340

'info_dict': {

5341

'title': 'Uploads from BlankTV',

5342

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5343

'uploader': 'BlankTV',

5344

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5345

'channel': 'BlankTV',

5346

'channel_url': 'https://www.youtube.com/c/blanktv',

5347

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5348

'view_count': int,

5349

'tags': [],

5350

'uploader_url': 'https://www.youtube.com/c/blanktv',

5351

'modified_date': r're:\d{8}',

5352

'description': '',

5353

'availability': 'public',

5354

},

5355

'playlist_mincount': 1000,

5356

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5357

}, {

5358

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5359

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5360

'info_dict': {

5361

'title': 'Data Analysis with Dr Mike Pound',

5362

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5363

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5364

'uploader': 'Computerphile',

5365

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5366

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5367

'tags': [],

5368

'view_count': int,

5369

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5370

'channel_url': 'https://www.youtube.com/user/Computerphile',

5371

'channel': 'Computerphile',

5372

'availability': 'public',

5373

'modified_date': '20190712',

5374

},

5375

'playlist_mincount': 11,

5376

}, {

5377

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5378

'only_matching': True,

5379

}, {

5380

'note': 'Playlist URL that does not actually serve a playlist',

5381

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5386

'uploader': 'STREEM',

5387

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5388

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5389

'upload_date': '20150526',

5390

'license': 'Standard YouTube License',

5391

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5392

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5399

},

5400

'skip': 'This video is not available.',

5401

'add_ie': [YoutubeIE.ie_key()],

5402

}, {

5403

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5404

'only_matching': True,

5405

}, {

5406

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5407

'only_matching': True,

5408

}, {

5409

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5410

'info_dict': {

5411

'id': 'Wq15eF5vCbI', # This will keep changing

5412

'ext': 'mp4',

5413

'title': str,

5414

'uploader': 'Sky News',

5415

'uploader_id': 'skynews',

5416

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5417

'upload_date': r're:\d{8}',

5418

'description': str,

5419

'categories': ['News & Politics'],

5420

'tags': list,

5421

'like_count': int,

5422

'release_timestamp': int,

5423

'channel': 'Sky News',

5424

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5425

'age_limit': 0,

5426

'view_count': int,

5427

'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',

5428

'playable_in_embed': True,

5429

'release_date': r're:\d+',

5430

'availability': 'public',

5431

'live_status': 'is_live',

5432

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5433

'channel_follower_count': int,

5434

'concurrent_view_count': int,

5435

},

5436

'params': {

5437

'skip_download': True,

5438

},

5439

'expected_warnings': ['Ignoring subtitle tracks found in '],

5440

}, {

5441

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5446

'uploader': 'The Young Turks',

5447

'uploader_id': 'TheYoungTurks',

5448

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5449

'upload_date': '20150715',

5450

'license': 'Standard YouTube License',

5451

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5452

'categories': ['News & Politics'],

5453

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5458

},

5459

'only_matching': True,

5460

}, {

5461

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5462

'only_matching': True,

5463

}, {

5464

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5465

'only_matching': True,

5466

}, {

5467

'note': 'A channel that is not live. Should raise error',

5468

'url': 'https://www.youtube.com/user/numberphile/live',

5469

'only_matching': True,

5470

}, {

5471

'url': 'https://www.youtube.com/feed/trending',

5472

'only_matching': True,

5473

}, {

5474

'url': 'https://www.youtube.com/feed/library',

5475

'only_matching': True,

5476

}, {

5477

'url': 'https://www.youtube.com/feed/history',

5478

'only_matching': True,

5479

}, {

5480

'url': 'https://www.youtube.com/feed/subscriptions',

5481

'only_matching': True,

5482

}, {

5483

'url': 'https://www.youtube.com/feed/watch_later',

5484

'only_matching': True,

5485

}, {

5486

'note': 'Recommended - redirects to home page.',

5487

'url': 'https://www.youtube.com/feed/recommended',

5488

'only_matching': True,

5489

}, {

5490

'note': 'inline playlist with not always working continuations',

5491

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5492

'only_matching': True,

5493

}, {

5494

'url': 'https://www.youtube.com/course',

5495

'only_matching': True,

5496

}, {

5497

'url': 'https://www.youtube.com/zsecurity',

5498

'only_matching': True,

5499

}, {

5500

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5501

'only_matching': True,

5502

}, {

5503

'url': 'https://www.youtube.com/TheYoungTurks/live',

5504

'only_matching': True,

5505

}, {

5506

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 300, # not consistent but should be over 300

5513

}, {

5514

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5515

'only_matching': True,

5516

}, {

5517

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5518

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5519

'only_matching': True

5520

}, {

5521

'note': '/browse/ should redirect to /channel/',

5522

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5523

'only_matching': True

5524

}, {

5525

'note': 'VLPL, should redirect to playlist?list=PL...',

5526

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5527

'info_dict': {

5528

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5529

'uploader': 'NoCopyrightSounds',

5530

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5531

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5532

'title': 'NCS : All Releases 💿',

5533

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5534

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5535

'modified_date': r're:\d{8}',

5536

'view_count': int,

5537

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5538

'tags': [],

5539

'channel': 'NoCopyrightSounds',

5540

'availability': 'public',

5541

},

5542

'playlist_mincount': 166,

5543

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5544

}, {

5545

'note': 'Topic, should redirect to playlist?list=UU...',

5546

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5547

'info_dict': {

5548

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5549

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5550

'title': 'Uploads from Royalty Free Music - Topic',

5551

'uploader': 'Royalty Free Music - Topic',

5552

'tags': [],

5553

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5554

'channel': 'Royalty Free Music - Topic',

5555

'view_count': int,

5556

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5557

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5558

'modified_date': r're:\d{8}',

5559

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5560

'description': '',

5561

'availability': 'public',

5562

},

5563

'playlist_mincount': 101,

5564

}, {

5565

# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)

5566

# Treat as a general feed

5567

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5568

'info_dict': {

5569

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5570

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5571

'tags': [],

5572

},

5573

'playlist_mincount': 9,

5574

}, {

5575

'note': 'Youtube music Album',

5576

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5577

'info_dict': {

5578

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5579

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5584

'modified_date': r're:\d{8}',

5585

},

5586

'playlist_count': 50,

5587

}, {

5588

'note': 'unlisted single video playlist',

5589

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5590

'info_dict': {

5591

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5592

'uploader': 'colethedj',

5593

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5594

'title': 'yt-dlp unlisted playlist test',

5595

'availability': 'unlisted',

5596

'tags': [],

5597

'modified_date': '20220418',

5598

'channel': 'colethedj',

5599

'view_count': int,

5600

'description': '',

5601

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5602

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5603

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5608

'url': 'https://www.youtube.com/feed/recommended',

5609

'info_dict': {

5610

'id': 'recommended',

5611

'title': 'recommended',

5612

'tags': [],

5613

},

5614

'playlist_mincount': 50,

5615

'params': {

5616

'skip_download': True,

5617

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5618

},

5619

}, {

5620

'note': 'API Fallback: /videos tab, sorted by oldest first',

5621

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5622

'info_dict': {

5623

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5624

'title': 'Cody\'sLab - Videos',

5625

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5626

'uploader': 'Cody\'sLab',

5627

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5628

'channel': 'Cody\'sLab',

5629

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5630

'tags': [],

5631

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5632

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5633

'channel_follower_count': int

5634

},

5635

'playlist_mincount': 650,

5636

'params': {

5637

'skip_download': True,

5638

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5639

},

5640

'skip': 'Query for sorting no longer works',

5641

}, {

5642

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5643

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5644

'info_dict': {

5645

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5646

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5647

'title': 'Uploads from Royalty Free Music - Topic',

5648

'uploader': 'Royalty Free Music - Topic',

5649

'modified_date': r're:\d{8}',

5650

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5651

'description': '',

5652

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5653

'tags': [],

5654

'channel': 'Royalty Free Music - Topic',

5655

'view_count': int,

5656

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5657

'availability': 'public',

5658

},

5659

'playlist_mincount': 101,

5660

'params': {

5661

'skip_download': True,

5662

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5663

},

5664

}, {

5665

'note': 'non-standard redirect to regional channel',

5666

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5667

'only_matching': True

5668

}, {

5669

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5670

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5671

'info_dict': {

5672

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5673

'modified_date': '20220407',

5674

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5675

'tags': [],

5676

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5677

'uploader': 'pukkandan',

5678

'availability': 'unlisted',

5679

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5680

'channel': 'pukkandan',

5681

'description': 'Test for collaborative playlist',

5682

'title': 'yt-dlp test - collaborative playlist',

5683

'view_count': int,

5684

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5685

},

5686

'playlist_mincount': 2

5687

}, {

5688

'note': 'translated tab name',

5689

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',

5690

'info_dict': {

5691

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5692

'tags': [],

5693

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5694

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5695

'description': 'test description',

5696

'title': 'cole-dlp-test-acc - 再生リスト',

5697

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5698

'uploader': 'cole-dlp-test-acc',

5699

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5700

'channel': 'cole-dlp-test-acc',

5701

'channel_follower_count': int,

5702

},

5703

'playlist_mincount': 1,

5704

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5705

'expected_warnings': ['Preferring "ja"'],

5706

}, {

5707

# XXX: this should really check flat playlist entries, but the test suite doesn't support that

5708

'note': 'preferred lang set with playlist with translated video titles',

5709

'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5710

'info_dict': {

5711

'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',

5712

'tags': [],

5713

'view_count': int,

5714

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5715

'uploader': 'cole-dlp-test-acc',

5716

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5717

'channel': 'cole-dlp-test-acc',

5718

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5719

'description': 'test',

5720

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5721

'title': 'dlp test playlist',

5722

'availability': 'public',

5723

},

5724

'playlist_mincount': 1,

5725

'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},

5726

'expected_warnings': ['Preferring "ja"'],

5727

}, {

5728

# shorts audio pivot for 2GtVksBMYFM.

5729

'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',

5730

'info_dict': {

5731

'id': 'sfv_audio_pivot',

5732

'title': 'sfv_audio_pivot',

5733

'tags': [],

5734

},

5735

'playlist_mincount': 50,

5736

5737

}, {

5738

# Channel with a real live tab (not to be mistaken with streams tab)

5739

# Do not treat like it should redirect to live stream

5740

'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',

5741

'info_dict': {

5742

'id': 'UCEH7P7kyJIkS_gJf93VYbmg',

5743

'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',

5744

'tags': [],

5745

},

5746

'playlist_mincount': 20,

5747

}, {

5748

# Tab name is not the same as tab id

5749

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',

5750

'info_dict': {

5751

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5752

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',

5753

'tags': [],

5754

},

5755

'playlist_mincount': 8,

5756

}, {

5757

# Home tab id is literally home. Not to get mistaken with featured

5758

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',

5759

'info_dict': {

5760

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5761

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',

5762

'tags': [],

5763

},

5764

'playlist_mincount': 8,

5765

}, {

5766

# Should get three playlists for videos, shorts and streams tabs

5767

'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5768

'info_dict': {

5769

'id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5770

'title': 'Polka Ch. 尾丸ポルカ',

5771

'channel_follower_count': int,

5772

'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

5773

'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5774

'uploader': 'Polka Ch. 尾丸ポルカ',

5775

'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',

5776

'channel': 'Polka Ch. 尾丸ポルカ',

5777

'tags': 'count:35',

5778

'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',

5779

'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',

},

'playlist_count': 3,

}, {

# Shorts tab with channel with handle

5784

'url': 'https://www.youtube.com/@NotJustBikes/shorts',

5785

'info_dict': {

5786

'id': 'UC0intLFzLaudFG-xAvUEO-A',

5787

'title': 'Not Just Bikes - Shorts',

5788

'tags': 'count:12',

5789

'uploader': 'Not Just Bikes',

5790

'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5791

'description': 'md5:7513148b1f02b924783157d84c4ea555',

5792

'channel_follower_count': int,

5793

'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',

5794

'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',

5795

'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',

5796

'channel': 'Not Just Bikes',

5797

},

5798

'playlist_mincount': 10,

5799

}, {

5800

# Streams tab

5801

'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',

5802

'info_dict': {

5803

'id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5804

'title': '中村悠一 - Live',

5805

'tags': 'count:7',

5806

'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5807

'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5808

'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',

5809

'channel': '中村悠一',

5810

'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',

5811

'channel_follower_count': int,

5812

'uploader': '中村悠一',

5813

'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',

5814

},

5815

'playlist_mincount': 60,

5816

}, {

5817

# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.

5818

# See test_youtube_lists

5819

'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',

5820

'only_matching': True,

5821

}, {

5822

# No uploads and no UCID given. Should fail with no uploads error

5823

# See test_youtube_lists

5824

'url': 'https://www.youtube.com/news',

5825

'only_matching': True

5826

}, {

5827

# No videos tab but has a shorts tab

5828

'url': 'https://www.youtube.com/c/TKFShorts',

5829

'info_dict': {

5830

'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5831

'title': 'Shorts Break - Shorts',

5832

'tags': 'count:32',

5833

'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5834

'channel': 'Shorts Break',

5835

'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',

5836

'uploader': 'Shorts Break',

5837

'channel_follower_count': int,

5838

'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',

5839

'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5840

'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',

5841

},

5842

'playlist_mincount': 30,

5843

}, {

5844

# Trending Now Tab. tab id is empty

5845

'url': 'https://www.youtube.com/feed/trending',

5846

'info_dict': {

5847

'id': 'trending',

5848

'title': 'trending - Now',

5849

'tags': [],

5850

},

5851

'playlist_mincount': 30,

5852

}, {

5853

# Trending Gaming Tab. tab id is empty

5854

'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',

5855

'info_dict': {

5856

'id': 'trending',

5857

'title': 'trending - Gaming',

5858

'tags': [],

5859

},

5860

'playlist_mincount': 30,

5861

}, {

5862

# Shorts url result in shorts tab

5863

'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',

5864

'info_dict': {

5865

'id': 'UCiu-3thuViMebBjw_5nWYrA',

5866

'title': 'cole-dlp-test-acc - Shorts',

5867

'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',

5868

'channel': 'cole-dlp-test-acc',

5869

'channel_follower_count': int,

5870

'description': 'test description',

5871

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5872

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

5873

'tags': [],

5874

'uploader': 'cole-dlp-test-acc',

5875

'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',

5883

'id': 'sSM9J5YH_60',

5884

'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',

5885

'title': 'SHORT short',

5886

'channel': 'cole-dlp-test-acc',

5887

'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',

'view_count': int,

'thumbnails': list,

}

}],

'params': {'extract_flat': True},

5893

}, {

5894

# Live video status should be extracted

5895

'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',

5896

'info_dict': {

5897

'id': 'UCQvWX73GQygcwXOTSf_VDVg',

5898

'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live

'tags': []

},

'playlist': [{

'info_dict': {

'_type': 'url',

'ie_key': 'Youtube',

'url': 'startswith:https://www.youtube.com/watch?v=',

5906

'id': str,

5907

'title': str,

5908

'live_status': 'is_live',

5909

'channel_id': str,

5910

'channel_url': str,

5911

'concurrent_view_count': int,

'channel': str,

}

}],

'params': {'extract_flat': True},

5916

'playlist_mincount': 1

}]

@classmethod

def suitable(cls, url):

5921

return False if YoutubeIE.suitable(url) else super().suitable(url)

5922

5923

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')

5924

5925

def _get_url_mobj(self, url):

5926

mobj = self._URL_RE.match(url).groupdict()

5927

mobj.update((k, '') for k, v in mobj.items() if v is None)

5928

return mobj

5929

5930

def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):

5931

tab_name = (tab.get('title') or '').lower()

5932

tab_url = urljoin(base_url, traverse_obj(

5933

tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))

5934

5935

tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]

5936

or traverse_obj(tab, 'tabIdentifier', expected_type=str))

5937

if tab_id:

5938

return {

5939

'TAB_ID_SPONSORSHIPS': 'membership',

5940

}.get(tab_id, tab_id), tab_name

5941

5942

# Fallback to tab name if we cannot get the tab id.

5943

# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel

5944

# Note that in the case of translated tab name this may result in an empty string, which we don't want.

5945

if tab_name:

5946

self.write_debug(f'Falling back to selected tab name: {tab_name}')

return {

'home': 'featured',

'live': 'streams',

}.get(tab_name, tab_name), tab_name

5951

5952

def _has_tab(self, tabs, tab_id):

5953

return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)

5954

5955

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5956

def _real_extract(self, url, smuggled_data):

5957

item_id = self._match_id(url)

5958

url = urllib.parse.urlunparse(

5959

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5960

compat_opts = self.get_param('compat_opts', [])

5961

5962

mobj = self._get_url_mobj(url)

5963

pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']

5964

if is_channel and smuggled_data.get('is_music_url'):

5965

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5966

return self.url_result(

5967

f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])

5968

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5969

mdata = self._extract_tab_endpoint(

5970

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5971

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5972

get_all=False, expected_type=str)

5973

if not murl:

5974

raise ExtractorError('Failed to resolve album to playlist')

5975

return self.url_result(murl, YoutubeTabIE)

5976

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5977

return self.url_result(

5978

f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)

5979

5980

original_tab_id, display_id = tab[1:], f'{item_id}{tab}'

5981

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5982

url = f'{pre}/videos{post}'

5983

5984

# Handle both video/playlist URLs

5985

qs = parse_qs(url)

5986

video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]

5987

if not video_id and mobj['not_channel'].startswith('watch'):

5988

if not playlist_id:

5989

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5990

raise ExtractorError('A video URL was given without video ID', expected=True)

5991

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5992

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5993

return self.url_result(

5994

f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)

5995

5996

if not self._yes_playlist(playlist_id, video_id):

5997

return self.url_result(

5998

f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

5999

6000

data, ytcfg = self._extract_data(url, display_id)

6001

6002

# YouTube may provide a non-standard redirect to the regional channel

6003

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

6004

# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects

6005

redirect_url = traverse_obj(

6006

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

6007

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

6008

redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))

6009

self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')

6010

return self.url_result(redirect_url, YoutubeTabIE)

6011

6012

tabs, extra_tabs = self._extract_tab_renderers(data), []

6013

if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:

6014

selected_tab = self._extract_selected_tab(tabs)

6015

selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated

6016

self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')

6017

6018

if not original_tab_id and selected_tab_name:

6019

self.to_screen('Downloading all uploads of the channel. '

6020

'To download only the videos in a specific tab, pass the tab\'s URL')

6021

if self._has_tab(tabs, 'streams'):

6022

extra_tabs.append(''.join((pre, '/streams', post)))

6023

if self._has_tab(tabs, 'shorts'):

6024

extra_tabs.append(''.join((pre, '/shorts', post)))

6025

# XXX: Members-only tab should also be extracted

6026

6027

if not extra_tabs and selected_tab_id != 'videos':

6028

# Channel does not have streams, shorts or videos tabs

6029

if item_id[:2] != 'UC':

6030

raise ExtractorError('This channel has no uploads', expected=True)

6031

6032

# Topic channels don't have /videos. Use the equivalent playlist instead

6033

pl_id = f'UU{item_id[2:]}'

6034

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

6035

try:

6036

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

6037

except ExtractorError:

6038

raise ExtractorError('This channel has no uploads', expected=True)

6039

else:

6040

item_id, url = pl_id, pl_url

6041

self.to_screen(

6042

f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')

6043

6044

elif extra_tabs and selected_tab_id != 'videos':

6045

# When there are shorts/live tabs but not videos tab

6046

url, data = f'{pre}{post}', None

6047

6048

elif (original_tab_id or 'videos') != selected_tab_id:

6049

if original_tab_id == 'live':

6050

# Live tab should have redirected to the video

6051

# Except in the case the channel has an actual live tab

6052

# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live

6053

raise UserNotLive(video_id=item_id)

6054

elif selected_tab_name:

6055

raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)

6056

6057

# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg

6058

url = f'{pre}{post}'

6059

6060

# YouTube sometimes provides a button to reload playlist with unavailable videos.

6061

if 'no-youtube-unavailable-videos' not in compat_opts:

6062

data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data

6063

self._extract_and_report_alerts(data, only_once=True)

6064

6065

tabs, entries = self._extract_tab_renderers(data), []

6066

if tabs:

6067

entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]

6068

entries[0].update({

6069

'extractor_key': YoutubeTabIE.ie_key(),

6070

'extractor': YoutubeTabIE.IE_NAME,

6071

'webpage_url': url,

6072

})

6073

if self.get_param('playlist_items') == '0':

6074

entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)

6075

else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`

6076

entries.extend(map(self._real_extract, extra_tabs))

6077

6078

if len(entries) == 1:

6079

return entries[0]

6080

elif entries:

6081

metadata = self._extract_metadata_from_tabs(item_id, data)

6082

uploads_url = 'the Uploads (UU) playlist URL'

6083

if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):

6084

uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'

6085

self.to_screen(

6086

'Downloading as multiple playlists, separated by tabs. '

6087

f'To download as a single playlist instead, pass {uploads_url}')

6088

return self.playlist_result(entries, item_id, **metadata)

6089

6090

# Inline playlist

6091

playlist = traverse_obj(

6092

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

6093

if playlist:

6094

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

6095

6096

video_id = traverse_obj(

6097

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

6098

if video_id:

6099

if tab != '/live': # live tab is expected to redirect to video

6100

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

6101

return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)

6102

6103

raise ExtractorError('Unable to recognize tab page')

6104

6105

6106

class YoutubePlaylistIE(InfoExtractor):

6107

IE_DESC = 'YouTube playlists'

6108

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

6119

)''' % {

6120

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

6121

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

6122

}

6123

IE_NAME = 'youtube:playlist'

6124

_TESTS = [{

6125

'note': 'issue #673',

6126

'url': 'PLBB231211A4F62143',

6127

'info_dict': {

6128

'title': '[OLD]Team Fortress 2 (Class-based LP)',

6129

'id': 'PLBB231211A4F62143',

6130

'uploader': 'Wickman',

6131

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6132

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

6133

'view_count': int,

6134

'uploader_url': 'https://www.youtube.com/c/WickmanVT',

6135

'modified_date': r're:\d{8}',

6136

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

6137

'channel': 'Wickman',

6138

'tags': [],

6139

'channel_url': 'https://www.youtube.com/c/WickmanVT',

6140

'availability': 'public',

6141

},

6142

'playlist_mincount': 29,

6143

}, {

6144

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6145

'info_dict': {

6146

'title': 'YDL_safe_search',

6147

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

6148

},

6149

'playlist_count': 2,

6150

'skip': 'This playlist is private',

6151

}, {

6152

'note': 'embedded',

6153

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

6158

'uploader': 'milan',

6159

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6160

'description': '',

6161

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6162

'tags': [],

6163

'modified_date': '20140919',

6164

'view_count': int,

6165

'channel': 'milan',

6166

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

6167

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

6168

'availability': 'public',

6169

},

6170

'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],

6171

}, {

6172

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6173

'playlist_mincount': 455,

6174

'info_dict': {

6175

'title': '2018 Chinese New Singles (11/6 updated)',

6176

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

6177

'uploader': 'LBK',

6178

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6179

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

6180

'channel': 'LBK',

6181

'view_count': int,

6182

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

6183

'tags': [],

6184

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

6185

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

6186

'modified_date': r're:\d{8}',

6187

'availability': 'public',

6188

},

6189

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

6190

}, {

6191

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

6192

'only_matching': True,

6193

}, {

6194

# music album playlist

6195

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

6196

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

6201

if YoutubeTabIE.suitable(url):

6202

return False

6203

from ..utils import parse_qs

6204

qs = parse_qs(url)

6205

if qs.get('v', [None])[0]:

6206

return False

6207

return super().suitable(url)

6208

6209

def _real_extract(self, url):

6210

playlist_id = self._match_id(url)

6211

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

6212

url = update_url_query(

6213

'https://www.youtube.com/playlist',

6214

parse_qs(url) or {'list': playlist_id})

6215

if is_music_url:

6216

url = smuggle_url(url, {'is_music_url': True})

6217

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6218

6219

6220

class YoutubeYtBeIE(InfoExtractor):

6221

IE_DESC = 'youtu.be'

6222

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

6223

_TESTS = [{

6224

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

6229

'uploader': 'Backus-Page House Museum',

6230

'uploader_id': 'backuspagemuseum',

6231

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

6232

'upload_date': '20161008',

6233

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

6234

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

6239

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

6240

'channel': 'Backus-Page House Museum',

6241

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

6242

'live_status': 'not_live',

6243

'view_count': int,

6244

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

6245

'availability': 'public',

6246

'duration': 59,

6247

'comment_count': int,

6248

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

6253

},

6254

}, {

6255

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

6256

'only_matching': True,

6257

}]

6258

6259

def _real_extract(self, url):

6260

mobj = self._match_valid_url(url)

6261

video_id = mobj.group('id')

6262

playlist_id = mobj.group('playlist_id')

6263

return self.url_result(

6264

update_url_query('https://www.youtube.com/watch', {

6265

'v': video_id,

6266

'list': playlist_id,

6267

'feature': 'youtu.be',

6268

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

6269

6270

6271

class YoutubeLivestreamEmbedIE(InfoExtractor):

6272

IE_DESC = 'YouTube livestream embeds'

6273

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

6274

_TESTS = [{

6275

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

6276

'only_matching': True,

6277

}]

6278

6279

def _real_extract(self, url):

6280

channel_id = self._match_id(url)

6281

return self.url_result(

6282

f'https://www.youtube.com/channel/{channel_id}/live',

6283

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

6284

6285

6286

class YoutubeYtUserIE(InfoExtractor):

6287

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

6288

IE_NAME = 'youtube:user'

6289

_VALID_URL = r'ytuser:(?P<id>.+)'

6290

_TESTS = [{

6291

'url': 'ytuser:phihag',

6292

'only_matching': True,

6293

}]

6294

6295

def _real_extract(self, url):

6296

user_id = self._match_id(url)

6297

return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)

6298

6299

6300

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

6301

IE_NAME = 'youtube:favorites'

6302

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

6303

_VALID_URL = r':ytfav(?:ou?rite)?s?'

6304

_LOGIN_REQUIRED = True

6305

_TESTS = [{

6306

'url': ':ytfav',

6307

'only_matching': True,

6308

}, {

6309

'url': ':ytfavorites',

6310

'only_matching': True,

6311

}]

6312

6313

def _real_extract(self, url):

6314

return self.url_result(

6315

'https://www.youtube.com/playlist?list=LL',

6316

ie=YoutubeTabIE.ie_key())

6317

6318

6319

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

6320

IE_NAME = 'youtube:notif'

6321

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

6322

_VALID_URL = r':ytnotif(?:ication)?s?'

6323

_LOGIN_REQUIRED = True

6324

_TESTS = [{

6325

'url': ':ytnotif',

6326

'only_matching': True,

6327

}, {

6328

'url': ':ytnotifications',

6329

'only_matching': True,

6330

}]

6331

6332

def _extract_notification_menu(self, response, continuation_list):

6333

notification_list = traverse_obj(

6334

response,

6335

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

6336

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

6337

expected_type=list) or []

6338

continuation_list[0] = None

6339

for item in notification_list:

6340

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

6341

if entry:

6342

yield entry

6343

continuation = item.get('continuationItemRenderer')

6344

if continuation:

6345

continuation_list[0] = continuation

6346

6347

def _extract_notification_renderer(self, notification):

6348

video_id = traverse_obj(

6349

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

6350

url = f'https://www.youtube.com/watch?v={video_id}'

6351

channel_id = None

6352

if not video_id:

6353

browse_ep = traverse_obj(

6354

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

6355

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

6356

post_id = self._search_regex(

6357

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

6358

'post id', default=None)

6359

if not channel_id or not post_id:

6360

return

6361

# The direct /post url redirects to this in the browser

6362

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

6363

6364

channel = traverse_obj(

6365

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

6366

expected_type=str)

6367

notification_title = self._get_text(notification, 'shortMessage')

6368

if notification_title:

6369

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

6370

# TODO: handle recommended videos

6371

title = self._search_regex(

6372

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

6373

'video title', default=None)

6374

timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))

6375

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

6381

'video_id': video_id,

6382

'title': title,

6383

'channel_id': channel_id,

6384

'channel': channel,

6385

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

6386

'timestamp': timestamp,

6387

}

6388

6389

def _notification_menu_entries(self, ytcfg):

6390

continuation_list = [None]

6391

response = None

6392

for page in itertools.count(1):

6393

ctoken = traverse_obj(

6394

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

6395

response = self._extract_response(

6396

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

6397

ep='notification/get_notification_menu', check_get_keys='actions',

6398

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

6399

yield from self._extract_notification_menu(response, continuation_list)

6400

if not continuation_list[0]:

6401

break

6402

6403

def _real_extract(self, url):

6404

display_id = 'notifications'

6405

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

6406

self._report_playlist_authcheck(ytcfg)

6407

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

6408

6409

6410

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6411

IE_DESC = 'YouTube search'

6412

IE_NAME = 'youtube:search'

6413

_SEARCH_KEY = 'ytsearch'

6414

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

6415

_TESTS = [{

6416

'url': 'ytsearch5:youtube-dl test video',

6417

'playlist_count': 5,

6418

'info_dict': {

6419

'id': 'youtube-dl test video',

6420

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

6426

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

6427

_SEARCH_KEY = 'ytsearchdate'

6428

IE_DESC = 'YouTube search, newest videos first'

6429

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

6430

_TESTS = [{

6431

'url': 'ytsearchdate5:youtube-dl test video',

6432

'playlist_count': 5,

6433

'info_dict': {

6434

'id': 'youtube-dl test video',

6435

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

6441

IE_DESC = 'YouTube search URLs with sorting and filter support'

6442

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

6443

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6444

_TESTS = [{

6445

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

6446

'playlist_mincount': 5,

6447

'info_dict': {

6448

'id': 'youtube-dl test video',

6449

'title': 'youtube-dl test video',

6450

}

6451

}, {

6452

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

6453

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

6460

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

6465

# 'entries': [{

6466

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

6472

'only_matching': True,

6473

}]

6474

6475

def _real_extract(self, url):

6476

qs = parse_qs(url)

6477

query = (qs.get('search_query') or qs.get('q'))[0]

6478

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

6479

6480

6481

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

6482

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

6483

IE_NAME = 'youtube:music:search_url'

6484

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

6485

_TESTS = [{

6486

'url': 'https://music.youtube.com/search?q=royalty+free+music',

6487

'playlist_count': 16,

6488

'info_dict': {

6489

'id': 'royalty free music',

6490

'title': 'royalty free music',

6491

}

6492

}, {

6493

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

6494

'playlist_mincount': 30,

6495

'info_dict': {

6496

'id': 'royalty free music - songs',

6497

'title': 'royalty free music - songs',

6498

},

6499

'params': {'extract_flat': 'in_playlist'}

6500

}, {

6501

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

6502

'playlist_mincount': 30,

6503

'info_dict': {

6504

'id': 'royalty free music - community playlists',

6505

'title': 'royalty free music - community playlists',

6506

},

6507

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

6512

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

6513

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

6514

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

6515

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

6516

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

6517

}

6518

6519

def _real_extract(self, url):

6520

qs = parse_qs(url)

6521

query = (qs.get('search_query') or qs.get('q'))[0]

6522

params = qs.get('sp', (None,))[0]

6523

if params:

6524

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

6525

else:

6526

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

6527

params = self._SECTIONS.get(section)

6528

if not params:

6529

section = None

6530

title = join_nonempty(query, section, delim=' - ')

6531

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

6532

6533

6534

class YoutubeFeedsInfoExtractor(InfoExtractor):

6535

"""

6536

Base class for feed extractors

6537

Subclasses must re-define the _FEED_NAME property.

6538

"""

6539

_LOGIN_REQUIRED = True

6540

_FEED_NAME = 'feeds'

6541

6542

def _real_initialize(self):

6543

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

6548

6549

def _real_extract(self, url):

6550

return self.url_result(

6551

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

6552

6553

6554

class YoutubeWatchLaterIE(InfoExtractor):

6555

IE_NAME = 'youtube:watchlater'

6556

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

6557

_VALID_URL = r':ytwatchlater'

6558

_TESTS = [{

6559

'url': ':ytwatchlater',

6560

'only_matching': True,

6561

}]

6562

6563

def _real_extract(self, url):

6564

return self.url_result(

6565

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

6566

6567

6568

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

6569

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

6570

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

6571

_FEED_NAME = 'recommended'

6572

_LOGIN_REQUIRED = False

6573

_TESTS = [{

6574

'url': ':ytrec',

6575

'only_matching': True,

6576

}, {

6577

'url': ':ytrecommended',

6578

'only_matching': True,

6579

}, {

6580

'url': 'https://youtube.com',

6581

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

6586

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

6587

_VALID_URL = r':ytsub(?:scription)?s?'

6588

_FEED_NAME = 'subscriptions'

6589

_TESTS = [{

6590

'url': ':ytsubs',

6591

'only_matching': True,

6592

}, {

6593

'url': ':ytsubscriptions',

6594

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

6599

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

6600

_VALID_URL = r':ythis(?:tory)?'

6601

_FEED_NAME = 'history'

6602

_TESTS = [{

6603

'url': ':ythistory',

6604

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6609

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6610

IE_NAME = 'youtube:stories'

6611

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6612

_TESTS = [{

6613

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6614

'only_matching': True,

6615

}]

6616

6617

def _real_extract(self, url):

6618

playlist_id = f'RLTD{self._match_id(url)}'

6619

return self.url_result(

6620

smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),

6621

ie=YoutubeTabIE, video_id=playlist_id)

6622

6623

6624

class YoutubeShortsAudioPivotIE(InfoExtractor):

6625

IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'

6626

IE_NAME = 'youtube:shorts:pivot:audio'

6627

_VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'

6628

_TESTS = [{

6629

'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',

6630

'only_matching': True,

}]

@staticmethod

def _generate_audio_pivot_params(video_id):

6635

"""

6636

Generates sfv_audio_pivot browse params for this video id

6637

"""

6638

pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)

6639

return urllib.parse.quote(base64.b64encode(pb_params).decode())

6640

6641

def _real_extract(self, url):

6642

video_id = self._match_id(url)

6643

return self.url_result(

6644

f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',

ie=YoutubeTabIE)

class YoutubeTruncatedURLIE(InfoExtractor):

6649

IE_NAME = 'youtube:truncated_url'

6650

IE_DESC = False # Do not list

6651

_VALID_URL = r'''(?x)

6652

(?:https?://)?

6653

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6654

(?:watch\?(?:

6655

feature=[a-z_]+|

6656

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6669

'only_matching': True,

6670

}, {

6671

'url': 'https://www.youtube.com/watch?',

6672

'only_matching': True,

6673

}, {

6674

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6675

'only_matching': True,

6676

}, {

6677

'url': 'https://www.youtube.com/watch?feature=foo',

6678

'only_matching': True,

6679

}, {

6680

'url': 'https://www.youtube.com/watch?hl=en-GB',

6681

'only_matching': True,

6682

}, {

6683

'url': 'https://www.youtube.com/watch?t=2372',

6684

'only_matching': True,

6685

}]

6686

6687

def _real_extract(self, url):

6688

raise ExtractorError(

6689

'Did you forget to quote the URL? Remember that & is a meta '

6690

'character in most shells, so you want to put the URL in quotes, '

6691

'like youtube-dl '

6692

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6693

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6698

IE_NAME = 'youtube:clip'

6699

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6700

_TESTS = [{

6701

# FIXME: Other metadata should be extracted from the clip, not from the base video

6702

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6703

'info_dict': {

6704

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6705

'ext': 'mp4',

6706

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6711

'categories': ['Gaming'],

6712

'channel': 'Scott The Woz',

6713

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6714

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6715

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6716

'like_count': int,

6717

'playable_in_embed': True,

6718

'tags': 'count:17',

6719

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6720

'title': 'Mobile Games on Console - Scott The Woz',

6721

'upload_date': '20210920',

6722

'uploader': 'Scott The Woz',

6723

'uploader_id': 'scottthewoz',

6724

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6725

'view_count': int,

6726

'live_status': 'not_live',

6727

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6732

clip_id = self._match_id(url)

6733

_, data = self._extract_webpage(url, clip_id)

6734

6735

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6736

if not video_id:

6737

raise ExtractorError('Unable to find video ID')

6738

6739

clip_data = traverse_obj(data, (

6740

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6741

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6742

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6743

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6744

6745

return {

6746

'_type': 'url_transparent',

6747

'url': f'https://www.youtube.com/watch?v={video_id}',

6748

'ie_key': YoutubeIE.ie_key(),

6749

'id': clip_id,

6750

'section_start': int(clip_data['startTimeMs']) / 1000,

6751

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6756

IE_NAME = 'youtube:truncated_id'

6757

IE_DESC = False # Do not list

6758

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6759

6760

_TESTS = [{

6761

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6762

'only_matching': True,

6763

}]

6764

6765

def _real_extract(self, url):

6766

video_id = self._match_id(url)

6767

raise ExtractorError(

6768

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6769

expected=True)