jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from .openload import PhantomJSwrapper
	21	from ..compat import functools
	22	from ..jsinterp import JSInterpreter
	23	from ..utils import (
	24	NO_DEFAULT,
	25	ExtractorError,
	26	UserNotLive,
	27	bug_reports_message,
	28	classproperty,
	29	clean_html,
	30	datetime_from_str,
	31	dict_get,
	32	float_or_none,
	33	format_field,
	34	get_first,
	35	int_or_none,
	36	is_html,
	37	join_nonempty,
	38	js_to_json,
	39	mimetype2ext,
	40	network_exceptions,
	41	orderedSet,
	42	parse_codecs,
	43	parse_count,
	44	parse_duration,
	45	parse_iso8601,
	46	parse_qs,
	47	qualities,
	48	remove_start,
	49	smuggle_url,
	50	str_or_none,
	51	str_to_int,
	52	strftime_or_none,
	53	traverse_obj,
	54	try_get,
	55	unescapeHTML,
	56	unified_strdate,
	57	unified_timestamp,
	58	unsmuggle_url,
	59	update_url_query,
	60	url_or_none,
	61	urljoin,
	62	variadic,
	63	)
	64
	65	# any clients starting with _ cannot be explicitly requested by the user
	66	INNERTUBE_CLIENTS = {
	67	'web': {
	68	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	69	'INNERTUBE_CONTEXT': {
	70	'client': {
	71	'clientName': 'WEB',
	72	'clientVersion': '2.20220801.00.00',
	73	}
	74	},
	75	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	76	},
	77	'web_embedded': {
	78	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	79	'INNERTUBE_CONTEXT': {
	80	'client': {
	81	'clientName': 'WEB_EMBEDDED_PLAYER',
	82	'clientVersion': '1.20220731.00.00',
	83	},
	84	},
	85	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	86	},
	87	'web_music': {
	88	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	89	'INNERTUBE_HOST': 'music.youtube.com',
	90	'INNERTUBE_CONTEXT': {
	91	'client': {
	92	'clientName': 'WEB_REMIX',
	93	'clientVersion': '1.20220727.01.00',
	94	}
	95	},
	96	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	97	},
	98	'web_creator': {
	99	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	100	'INNERTUBE_CONTEXT': {
	101	'client': {
	102	'clientName': 'WEB_CREATOR',
	103	'clientVersion': '1.20220726.00.00',
	104	}
	105	},
	106	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	107	},
	108	'android': {
	109	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	110	'INNERTUBE_CONTEXT': {
	111	'client': {
	112	'clientName': 'ANDROID',
	113	'clientVersion': '17.29.34',
	114	'androidSdkVersion': 30
	115	}
	116	},
	117	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	118	'REQUIRE_JS_PLAYER': False
	119	},
	120	'android_embedded': {
	121	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	122	'INNERTUBE_CONTEXT': {
	123	'client': {
	124	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	125	'clientVersion': '17.29.34',
	126	'androidSdkVersion': 30
	127	},
	128	},
	129	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	130	'REQUIRE_JS_PLAYER': False
	131	},
	132	'android_music': {
	133	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	134	'INNERTUBE_CONTEXT': {
	135	'client': {
	136	'clientName': 'ANDROID_MUSIC',
	137	'clientVersion': '5.16.51',
	138	'androidSdkVersion': 30
	139	}
	140	},
	141	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	142	'REQUIRE_JS_PLAYER': False
	143	},
	144	'android_creator': {
	145	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	146	'INNERTUBE_CONTEXT': {
	147	'client': {
	148	'clientName': 'ANDROID_CREATOR',
	149	'clientVersion': '22.28.100',
	150	'androidSdkVersion': 30
	151	},
	152	},
	153	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	154	'REQUIRE_JS_PLAYER': False
	155	},
	156	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	157	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	158	'ios': {
	159	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	160	'INNERTUBE_CONTEXT': {
	161	'client': {
	162	'clientName': 'IOS',
	163	'clientVersion': '17.30.1',
	164	'deviceModel': 'iPhone14,3',
	165	}
	166	},
	167	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	168	'REQUIRE_JS_PLAYER': False
	169	},
	170	'ios_embedded': {
	171	'INNERTUBE_CONTEXT': {
	172	'client': {
	173	'clientName': 'IOS_MESSAGES_EXTENSION',
	174	'clientVersion': '17.30.1',
	175	'deviceModel': 'iPhone14,3',
	176	},
	177	},
	178	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	179	'REQUIRE_JS_PLAYER': False
	180	},
	181	'ios_music': {
	182	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	183	'INNERTUBE_CONTEXT': {
	184	'client': {
	185	'clientName': 'IOS_MUSIC',
	186	'clientVersion': '5.18',
	187	},
	188	},
	189	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	190	'REQUIRE_JS_PLAYER': False
	191	},
	192	'ios_creator': {
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_CREATOR',
	196	'clientVersion': '22.29.101',
	197	},
	198	},
	199	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	200	'REQUIRE_JS_PLAYER': False
	201	},
	202	# mweb has 'ultralow' formats
	203	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	204	'mweb': {
	205	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	206	'INNERTUBE_CONTEXT': {
	207	'client': {
	208	'clientName': 'MWEB',
	209	'clientVersion': '2.20220801.00.00',
	210	}
	211	},
	212	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	213	},
	214	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	215	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	216	'tv_embedded': {
	217	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	218	'INNERTUBE_CONTEXT': {
	219	'client': {
	220	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	221	'clientVersion': '2.0',
	222	},
	223	},
	224	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	225	},
	226	}
	227
	228
	229	def _split_innertube_client(client_name):
	230	variant, *base = client_name.rsplit('.', 1)
	231	if base:
	232	return variant, base[0], variant
	233	base, *variant = client_name.split('_', 1)
	234	return client_name, base, variant[0] if variant else None
	235
	236
	237	def build_innertube_clients():
	238	THIRD_PARTY = {
	239	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	240	}
	241	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	242	priority = qualities(BASE_CLIENTS[::-1])
	243
	244	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	245	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	246	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	247	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	248	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	249
	250	_, base_client, variant = _split_innertube_client(client)
	251	ytcfg['priority'] = 10 * priority(base_client)
	252
	253	if not variant:
	254	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	255	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	256	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	257	embedscreen['priority'] -= 3
	258	elif variant == 'embedded':
	259	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	260	ytcfg['priority'] -= 2
	261	else:
	262	ytcfg['priority'] -= 3
	263
	264
	265	build_innertube_clients()
	266
	267
	268	class YoutubeBaseInfoExtractor(InfoExtractor):
	269	"""Provide base functions for Youtube extractors"""
	270
	271	_RESERVED_NAMES = (
	272	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	273	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	274	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	275	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	276
	277	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	278
	279	# _NETRC_MACHINE = 'youtube'
	280
	281	# If True it will raise an error if no login info is provided
	282	_LOGIN_REQUIRED = False
	283
	284	_INVIDIOUS_SITES = (
	285	# invidious-redirect websites
	286	r'(?:www\.)?redirect\.invidious\.io',
	287	r'(?:(?:www\|dev)\.)?invidio\.us',
	288	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	289	r'(?:www\.)?invidious\.pussthecat\.org',
	290	r'(?:www\.)?invidious\.zee\.li',
	291	r'(?:www\.)?invidious\.ethibox\.fr',
	292	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	293	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	294	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	295	# youtube-dl invidious instances list
	296	r'(?:(?:www\|no)\.)?invidiou\.sh',
	297	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	298	r'(?:www\.)?invidious\.kabi\.tk',
	299	r'(?:www\.)?invidious\.mastodon\.host',
	300	r'(?:www\.)?invidious\.zapashcanon\.fr',
	301	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	302	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	303	r'(?:www\.)?invidious\.himiko\.cloud',
	304	r'(?:www\.)?invidious\.reallyancient\.tech',
	305	r'(?:www\.)?invidious\.tube',
	306	r'(?:www\.)?invidiou\.site',
	307	r'(?:www\.)?invidious\.site',
	308	r'(?:www\.)?invidious\.xyz',
	309	r'(?:www\.)?invidious\.nixnet\.xyz',
	310	r'(?:www\.)?invidious\.048596\.xyz',
	311	r'(?:www\.)?invidious\.drycat\.fr',
	312	r'(?:www\.)?inv\.skyn3t\.in',
	313	r'(?:www\.)?tube\.poal\.co',
	314	r'(?:www\.)?tube\.connect\.cafe',
	315	r'(?:www\.)?vid\.wxzm\.sx',
	316	r'(?:www\.)?vid\.mint\.lgbt',
	317	r'(?:www\.)?vid\.puffyan\.us',
	318	r'(?:www\.)?yewtu\.be',
	319	r'(?:www\.)?yt\.elukerio\.org',
	320	r'(?:www\.)?yt\.lelux\.fi',
	321	r'(?:www\.)?invidious\.ggc-project\.de',
	322	r'(?:www\.)?yt\.maisputain\.ovh',
	323	r'(?:www\.)?ytprivate\.com',
	324	r'(?:www\.)?invidious\.13ad\.de',
	325	r'(?:www\.)?invidious\.toot\.koeln',
	326	r'(?:www\.)?invidious\.fdn\.fr',
	327	r'(?:www\.)?watch\.nettohikari\.com',
	328	r'(?:www\.)?invidious\.namazso\.eu',
	329	r'(?:www\.)?invidious\.silkky\.cloud',
	330	r'(?:www\.)?invidious\.exonip\.de',
	331	r'(?:www\.)?invidious\.riverside\.rocks',
	332	r'(?:www\.)?invidious\.blamefran\.net',
	333	r'(?:www\.)?invidious\.moomoo\.de',
	334	r'(?:www\.)?ytb\.trom\.tf',
	335	r'(?:www\.)?yt\.cyberhost\.uk',
	336	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	337	r'(?:www\.)?qklhadlycap4cnod\.onion',
	338	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	339	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	340	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	341	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	342	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	343	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	344	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	345	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	346	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	347	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	348	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	349	r'(?:www\.)?piped\.kavin\.rocks',
	350	r'(?:www\.)?piped\.silkky\.cloud',
	351	r'(?:www\.)?piped\.tokhmi\.xyz',
	352	r'(?:www\.)?piped\.moomoo\.me',
	353	r'(?:www\.)?il\.ax',
	354	r'(?:www\.)?piped\.syncpundit\.com',
	355	r'(?:www\.)?piped\.mha\.fi',
	356	r'(?:www\.)?piped\.mint\.lgbt',
	357	r'(?:www\.)?piped\.privacy\.com\.de',
	358	)
	359
	360	def _initialize_consent(self):
	361	cookies = self._get_cookies('https://www.youtube.com/')
	362	if cookies.get('__Secure-3PSID'):
	363	return
	364	consent_id = None
	365	consent = cookies.get('CONSENT')
	366	if consent:
	367	if 'YES' in consent.value:
	368	return
	369	consent_id = self._search_regex(
	370	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	371	if not consent_id:
	372	consent_id = random.randint(100, 999)
	373	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	374
	375	def _initialize_pref(self):
	376	cookies = self._get_cookies('https://www.youtube.com/')
	377	pref_cookie = cookies.get('PREF')
	378	pref = {}
	379	if pref_cookie:
	380	try:
	381	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	382	except ValueError:
	383	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	384	pref.update({'hl': 'en', 'tz': 'UTC'})
	385	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	386
	387	def _real_initialize(self):
	388	self._initialize_pref()
	389	self._initialize_consent()
	390	self._check_login_required()
	391
	392	def _check_login_required(self):
	393	if self._LOGIN_REQUIRED and not self._cookies_passed:
	394	self.raise_login_required('Login details are needed to download this content', method='cookies')
	395
	396	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	397	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	398
	399	def _get_default_ytcfg(self, client='web'):
	400	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	401
	402	def _get_innertube_host(self, client='web'):
	403	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	404
	405	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	406	# try_get but with fallback to default ytcfg client values when present
	407	_func = lambda y: try_get(y, getter, expected_type)
	408	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	409
	410	def _extract_client_name(self, ytcfg, default_client='web'):
	411	return self._ytcfg_get_safe(
	412	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	413	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	414
	415	def _extract_client_version(self, ytcfg, default_client='web'):
	416	return self._ytcfg_get_safe(
	417	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	418	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	419
	420	def _select_api_hostname(self, req_api_hostname, default_client=None):
	421	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	422	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	423
	424	def _extract_api_key(self, ytcfg=None, default_client='web'):
	425	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	426
	427	def _extract_context(self, ytcfg=None, default_client='web'):
	428	context = get_first(
	429	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	430	# Enforce language and tz for extraction
	431	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	432	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	433	return context
	434
	435	_SAPISID = None
	436
	437	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	438	time_now = round(time.time())
	439	if self._SAPISID is None:
	440	yt_cookies = self._get_cookies('https://www.youtube.com')
	441	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	442	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	443	sapisid_cookie = dict_get(
	444	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	445	if sapisid_cookie and sapisid_cookie.value:
	446	self._SAPISID = sapisid_cookie.value
	447	self.write_debug('Extracted SAPISID cookie')
	448	# SAPISID cookie is required if not already present
	449	if not yt_cookies.get('SAPISID'):
	450	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	451	self._set_cookie(
	452	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	453	else:
	454	self._SAPISID = False
	455	if not self._SAPISID:
	456	return None
	457	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	458	sapisidhash = hashlib.sha1(
	459	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	460	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	461
	462	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	463	note='Downloading API JSON', errnote='Unable to download API page',
	464	context=None, api_key=None, api_hostname=None, default_client='web'):
	465
	466	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	467	data.update(query)
	468	real_headers = self.generate_api_headers(default_client=default_client)
	469	real_headers.update({'content-type': 'application/json'})
	470	if headers:
	471	real_headers.update(headers)
	472	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	473	or api_key or self._extract_api_key(default_client=default_client))
	474	return self._download_json(
	475	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	476	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	477	data=json.dumps(data).encode('utf8'), headers=real_headers,
	478	query={'key': api_key, 'prettyPrint': 'false'})
	479
	480	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	481	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	482
	483	@staticmethod
	484	def _extract_session_index(*data):
	485	"""
	486	Index of current account in account list.
	487	See: https://github.com/yt-dlp/yt-dlp/pull/519
	488	"""
	489	for ytcfg in data:
	490	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	491	if session_index is not None:
	492	return session_index
	493
	494	# Deprecated?
	495	def _extract_identity_token(self, ytcfg=None, webpage=None):
	496	if ytcfg:
	497	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	498	if token:
	499	return token
	500	if webpage:

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from .openload import PhantomJSwrapper

21

from ..compat import functools

22

from ..jsinterp import JSInterpreter

23

from ..utils import (

NO_DEFAULT,

ExtractorError,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

66

INNERTUBE_CLIENTS = {

67

'web': {

68

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

69

'INNERTUBE_CONTEXT': {

70

'client': {

71

'clientName': 'WEB',

72

'clientVersion': '2.20220801.00.00',

73

}

74

},

75

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

76

},

77

'web_embedded': {

78

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

79

'INNERTUBE_CONTEXT': {

80

'client': {

81

'clientName': 'WEB_EMBEDDED_PLAYER',

82

'clientVersion': '1.20220731.00.00',

83

},

84

},

85

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

86

},

87

'web_music': {

88

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

89

'INNERTUBE_HOST': 'music.youtube.com',

90

'INNERTUBE_CONTEXT': {

91

'client': {

92

'clientName': 'WEB_REMIX',

93

'clientVersion': '1.20220727.01.00',

94

}

95

},

96

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

97

},

98

'web_creator': {

99

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

100

'INNERTUBE_CONTEXT': {

101

'client': {

102

'clientName': 'WEB_CREATOR',

103

'clientVersion': '1.20220726.00.00',

104

}

105

},

106

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

107

},

108

'android': {

109

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

110

'INNERTUBE_CONTEXT': {

111

'client': {

112

'clientName': 'ANDROID',

113

'clientVersion': '17.29.34',

114

'androidSdkVersion': 30

115

}

116

},

117

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

118

'REQUIRE_JS_PLAYER': False

119

},

120

'android_embedded': {

121

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

122

'INNERTUBE_CONTEXT': {

123

'client': {

124

'clientName': 'ANDROID_EMBEDDED_PLAYER',

125

'clientVersion': '17.29.34',

126

'androidSdkVersion': 30

127

},

128

},

129

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

130

'REQUIRE_JS_PLAYER': False

131

},

132

'android_music': {

133

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

134

'INNERTUBE_CONTEXT': {

135

'client': {

136

'clientName': 'ANDROID_MUSIC',

137

'clientVersion': '5.16.51',

138

'androidSdkVersion': 30

139

}

140

},

141

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

142

'REQUIRE_JS_PLAYER': False

143

},

144

'android_creator': {

145

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

146

'INNERTUBE_CONTEXT': {

147

'client': {

148

'clientName': 'ANDROID_CREATOR',

149

'clientVersion': '22.28.100',

150

'androidSdkVersion': 30

151

},

152

},

153

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

154

'REQUIRE_JS_PLAYER': False

155

},

156

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

157

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

158

'ios': {

159

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

160

'INNERTUBE_CONTEXT': {

161

'client': {

162

'clientName': 'IOS',

163

'clientVersion': '17.30.1',

164

'deviceModel': 'iPhone14,3',

165

}

166

},

167

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

168

'REQUIRE_JS_PLAYER': False

169

},

170

'ios_embedded': {

171

'INNERTUBE_CONTEXT': {

172

'client': {

173

'clientName': 'IOS_MESSAGES_EXTENSION',

174

'clientVersion': '17.30.1',

175

'deviceModel': 'iPhone14,3',

176

},

177

},

178

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

179

'REQUIRE_JS_PLAYER': False

180

},

181

'ios_music': {

182

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

183

'INNERTUBE_CONTEXT': {

184

'client': {

185

'clientName': 'IOS_MUSIC',

186

'clientVersion': '5.18',

187

},

188

},

189

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

190

'REQUIRE_JS_PLAYER': False

191

},

192

'ios_creator': {

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_CREATOR',

196

'clientVersion': '22.29.101',

197

},

198

},

199

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

200

'REQUIRE_JS_PLAYER': False

201

},

202

# mweb has 'ultralow' formats

203

# See: https://github.com/yt-dlp/yt-dlp/pull/557

204

'mweb': {

205

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

206

'INNERTUBE_CONTEXT': {

207

'client': {

208

'clientName': 'MWEB',

209

'clientVersion': '2.20220801.00.00',

210

}

211

},

212

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

213

},

214

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

215

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

216

'tv_embedded': {

217

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

218

'INNERTUBE_CONTEXT': {

219

'client': {

220

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

221

'clientVersion': '2.0',

222

},

223

},

224

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

230

variant, *base = client_name.rsplit('.', 1)

231

if base:

232

return variant, base[0], variant

233

base, *variant = client_name.split('_', 1)

234

return client_name, base, variant[0] if variant else None

235

236

237

def build_innertube_clients():

238

THIRD_PARTY = {

239

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

240

}

241

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

242

priority = qualities(BASE_CLIENTS[::-1])

243

244

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

245

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

246

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

247

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

248

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

249

250

_, base_client, variant = _split_innertube_client(client)

251

ytcfg['priority'] = 10 * priority(base_client)

252

253

if not variant:

254

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

255

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

256

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

257

embedscreen['priority'] -= 3

258

elif variant == 'embedded':

259

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

260

ytcfg['priority'] -= 2

261

else:

262

ytcfg['priority'] -= 3

263

264

265

build_innertube_clients()

266

267

268

class YoutubeBaseInfoExtractor(InfoExtractor):

269

"""Provide base functions for Youtube extractors"""

270

271

_RESERVED_NAMES = (

272

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

278

279

# _NETRC_MACHINE = 'youtube'

280

281

# If True it will raise an error if no login info is provided

282

_LOGIN_REQUIRED = False

283

284

_INVIDIOUS_SITES = (

285

# invidious-redirect websites

286

r'(?:www\.)?redirect\.invidious\.io',

287

r'(?:(?:www|dev)\.)?invidio\.us',

288

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

289

r'(?:www\.)?invidious\.pussthecat\.org',

290

r'(?:www\.)?invidious\.zee\.li',

291

r'(?:www\.)?invidious\.ethibox\.fr',

292

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

293

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

294

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

295

# youtube-dl invidious instances list

296

r'(?:(?:www|no)\.)?invidiou\.sh',

297

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

298

r'(?:www\.)?invidious\.kabi\.tk',

299

r'(?:www\.)?invidious\.mastodon\.host',

300

r'(?:www\.)?invidious\.zapashcanon\.fr',

301

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

302

r'(?:www\.)?invidious\.tinfoil-hat\.net',

303

r'(?:www\.)?invidious\.himiko\.cloud',

304

r'(?:www\.)?invidious\.reallyancient\.tech',

305

r'(?:www\.)?invidious\.tube',

306

r'(?:www\.)?invidiou\.site',

307

r'(?:www\.)?invidious\.site',

308

r'(?:www\.)?invidious\.xyz',

309

r'(?:www\.)?invidious\.nixnet\.xyz',

310

r'(?:www\.)?invidious\.048596\.xyz',

311

r'(?:www\.)?invidious\.drycat\.fr',

312

r'(?:www\.)?inv\.skyn3t\.in',

313

r'(?:www\.)?tube\.poal\.co',

314

r'(?:www\.)?tube\.connect\.cafe',

315

r'(?:www\.)?vid\.wxzm\.sx',

316

r'(?:www\.)?vid\.mint\.lgbt',

317

r'(?:www\.)?vid\.puffyan\.us',

318

r'(?:www\.)?yewtu\.be',

319

r'(?:www\.)?yt\.elukerio\.org',

320

r'(?:www\.)?yt\.lelux\.fi',

321

r'(?:www\.)?invidious\.ggc-project\.de',

322

r'(?:www\.)?yt\.maisputain\.ovh',

323

r'(?:www\.)?ytprivate\.com',

324

r'(?:www\.)?invidious\.13ad\.de',

325

r'(?:www\.)?invidious\.toot\.koeln',

326

r'(?:www\.)?invidious\.fdn\.fr',

327

r'(?:www\.)?watch\.nettohikari\.com',

328

r'(?:www\.)?invidious\.namazso\.eu',

329

r'(?:www\.)?invidious\.silkky\.cloud',

330

r'(?:www\.)?invidious\.exonip\.de',

331

r'(?:www\.)?invidious\.riverside\.rocks',

332

r'(?:www\.)?invidious\.blamefran\.net',

333

r'(?:www\.)?invidious\.moomoo\.de',

334

r'(?:www\.)?ytb\.trom\.tf',

335

r'(?:www\.)?yt\.cyberhost\.uk',

336

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

337

r'(?:www\.)?qklhadlycap4cnod\.onion',

338

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

339

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

340

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

341

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

342

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

343

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

344

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

345

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

346

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

347

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

348

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

349

r'(?:www\.)?piped\.kavin\.rocks',

350

r'(?:www\.)?piped\.silkky\.cloud',

351

r'(?:www\.)?piped\.tokhmi\.xyz',

352

r'(?:www\.)?piped\.moomoo\.me',

353

r'(?:www\.)?il\.ax',

354

r'(?:www\.)?piped\.syncpundit\.com',

355

r'(?:www\.)?piped\.mha\.fi',

356

r'(?:www\.)?piped\.mint\.lgbt',

357

r'(?:www\.)?piped\.privacy\.com\.de',

358

)

359

360

def _initialize_consent(self):

361

cookies = self._get_cookies('https://www.youtube.com/')

362

if cookies.get('__Secure-3PSID'):

363

return

364

consent_id = None

365

consent = cookies.get('CONSENT')

366

if consent:

367

if 'YES' in consent.value:

368

return

369

consent_id = self._search_regex(

370

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

371

if not consent_id:

372

consent_id = random.randint(100, 999)

373

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

374

375

def _initialize_pref(self):

376

cookies = self._get_cookies('https://www.youtube.com/')

377

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

382

except ValueError:

383

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

384

pref.update({'hl': 'en', 'tz': 'UTC'})

385

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

386

387

def _real_initialize(self):

388

self._initialize_pref()

389

self._initialize_consent()

390

self._check_login_required()

391

392

def _check_login_required(self):

393

if self._LOGIN_REQUIRED and not self._cookies_passed:

394

self.raise_login_required('Login details are needed to download this content', method='cookies')

395

396

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

397

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

398

399

def _get_default_ytcfg(self, client='web'):

400

return copy.deepcopy(INNERTUBE_CLIENTS[client])

401

402

def _get_innertube_host(self, client='web'):

403

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

404

405

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

406

# try_get but with fallback to default ytcfg client values when present

407

_func = lambda y: try_get(y, getter, expected_type)

408

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

409

410

def _extract_client_name(self, ytcfg, default_client='web'):

411

return self._ytcfg_get_safe(

412

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

413

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

414

415

def _extract_client_version(self, ytcfg, default_client='web'):

416

return self._ytcfg_get_safe(

417

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

418

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

419

420

def _select_api_hostname(self, req_api_hostname, default_client=None):

421

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

422

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

423

424

def _extract_api_key(self, ytcfg=None, default_client='web'):

425

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

426

427

def _extract_context(self, ytcfg=None, default_client='web'):

428

context = get_first(

429

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

430

# Enforce language and tz for extraction

431

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

432

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

438

time_now = round(time.time())

439

if self._SAPISID is None:

440

yt_cookies = self._get_cookies('https://www.youtube.com')

441

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

442

# See: https://github.com/yt-dlp/yt-dlp/issues/393

443

sapisid_cookie = dict_get(

444

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

445

if sapisid_cookie and sapisid_cookie.value:

446

self._SAPISID = sapisid_cookie.value

447

self.write_debug('Extracted SAPISID cookie')

448

# SAPISID cookie is required if not already present

449

if not yt_cookies.get('SAPISID'):

450

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

451

self._set_cookie(

452

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

453

else:

454

self._SAPISID = False

455

if not self._SAPISID:

456

return None

457

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

458

sapisidhash = hashlib.sha1(

459

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

460

return f'SAPISIDHASH {time_now}_{sapisidhash}'

461

462

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

463

note='Downloading API JSON', errnote='Unable to download API page',

464

context=None, api_key=None, api_hostname=None, default_client='web'):

465

466

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

467

data.update(query)

468

real_headers = self.generate_api_headers(default_client=default_client)

469

real_headers.update({'content-type': 'application/json'})

470

if headers:

471

real_headers.update(headers)

472

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

473

or api_key or self._extract_api_key(default_client=default_client))

474

return self._download_json(

475

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

476

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

477

data=json.dumps(data).encode('utf8'), headers=real_headers,

478

query={'key': api_key, 'prettyPrint': 'false'})

479

480

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

481

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

482

483

@staticmethod

484

def _extract_session_index(*data):

485

"""

486

Index of current account in account list.

487

See: https://github.com/yt-dlp/yt-dlp/pull/519

488

"""

489

for ytcfg in data:

490

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

491

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

496

if ytcfg:

497

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

502

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

503

'identity token', default=None, fatal=False)

504

505

@staticmethod

506

def _extract_account_syncid(*args):

507

"""

508

Extract syncId required to download private playlists of secondary channels

509

@params response and/or ytcfg

510

"""

511

for data in args:

512

# ytcfg includes channel_syncid if on secondary channel

513

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

518

lambda x: x['DATASYNC_ID']), str) or '').split('||')

519

if len(sync_ids) >= 2 and sync_ids[1]:

520

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

521

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

526

"""

527

Extracts visitorData from an API response or ytcfg

528

Appears to be used to track session state

529

"""

530

return get_first(

531

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

532

expected_type=str)

533

534

@functools.cached_property

535

def is_authenticated(self):

536

return bool(self._generate_sapisidhash_header())

537

538

def extract_ytcfg(self, video_id, webpage):

539

if not webpage:

540

return {}

541

return self._parse_json(

542

self._search_regex(

543

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

544

default='{}'), video_id, fatal=False) or {}

545

546

def generate_api_headers(

547

self, *, ytcfg=None, account_syncid=None, session_index=None,

548

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

549

550

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

551

headers = {

552

'X-YouTube-Client-Name': str(

553

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

554

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

555

'Origin': origin,

556

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

557

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

558

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

559

}

560

if session_index is None:

561

session_index = self._extract_session_index(ytcfg)

562

if account_syncid or session_index is not None:

563

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

564

565

auth = self._generate_sapisidhash_header(origin)

566

if auth is not None:

567

headers['Authorization'] = auth

568

headers['X-Origin'] = origin

569

return {h: v for h, v in headers.items() if v is not None}

570

571

def _download_ytcfg(self, client, video_id):

572

url = {

573

'web': 'https://www.youtube.com',

574

'web_music': 'https://music.youtube.com',

575

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

580

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

581

return self.extract_ytcfg(video_id, webpage) or {}

582

583

@staticmethod

584

def _build_api_continuation_query(continuation, ctp=None):

585

query = {

586

'continuation': continuation

587

}

588

# TODO: Inconsistency with clickTrackingParams.

589

# Currently we have a fixed ctp contained within context (from ytcfg)

590

# and a ctp in root query for continuation.

591

if ctp:

592

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

597

next_continuation = try_get(

598

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

599

lambda x: x['continuation']['reloadContinuationData']), dict)

600

if not next_continuation:

601

return

602

continuation = next_continuation.get('continuation')

603

if not continuation:

604

return

605

ctp = next_continuation.get('clickTrackingParams')

606

return cls._build_api_continuation_query(continuation, ctp)

607

608

@classmethod

609

def _extract_continuation_ep_data(cls, continuation_ep: dict):

610

if isinstance(continuation_ep, dict):

611

continuation = try_get(

612

continuation_ep, lambda x: x['continuationCommand']['token'], str)

613

if not continuation:

614

return

615

ctp = continuation_ep.get('clickTrackingParams')

616

return cls._build_api_continuation_query(continuation, ctp)

617

618

@classmethod

619

def _extract_continuation(cls, renderer):

620

next_continuation = cls._extract_next_continuation_data(renderer)

621

if next_continuation:

622

return next_continuation

623

624

contents = []

625

for key in ('contents', 'items'):

626

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

627

628

for content in contents:

629

if not isinstance(content, dict):

630

continue

631

continuation_ep = try_get(

632

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

633

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

634

dict)

635

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

641

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

642

if not isinstance(alert_dict, dict):

643

continue

644

for alert in alert_dict.values():

645

alert_type = alert.get('type')

646

if not alert_type:

647

continue

648

message = cls._get_text(alert, 'text')

649

if message:

650

yield alert_type, message

651

652

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

653

errors = []

654

warnings = []

655

for alert_type, alert_message in alerts:

656

if alert_type.lower() == 'error' and fatal:

657

errors.append([alert_type, alert_message])

658

else:

659

warnings.append([alert_type, alert_message])

660

661

for alert_type, alert_message in (warnings + errors[:-1]):

662

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

663

if errors:

664

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

665

666

def _extract_and_report_alerts(self, data, *args, **kwargs):

667

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

668

669

def _extract_badges(self, renderer: dict):

670

badges = set()

671

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

672

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

673

if label:

674

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

679

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

684

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

685

obj = [obj]

686

for item in obj:

687

text = try_get(item, lambda x: x['simpleText'], str)

688

if text:

689

return text

690

runs = try_get(item, lambda x: x['runs'], list) or []

691

if not runs and isinstance(item, list):

692

runs = item

693

694

runs = runs[:min(len(runs), max_runs or len(runs))]

695

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

700

count_text = self._get_text(data, *path_list) or ''

701

count = parse_count(count_text)

702

if count is None:

703

count = str_to_int(

704

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

709

"""

710

Extract thumbnails from thumbnails dict

711

@param path_list: path list to level that contains 'thumbnails' key

712

"""

713

thumbnails = []

714

for path in path_list or [()]:

715

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

716

thumbnail_url = url_or_none(thumbnail.get('url'))

717

if not thumbnail_url:

718

continue

719

# Sometimes youtube gives a wrong thumbnail URL. See:

720

# https://github.com/yt-dlp/yt-dlp/issues/233

721

# https://github.com/ytdl-org/youtube-dl/issues/28023

722

if 'maxresdefault' in thumbnail_url:

723

thumbnail_url = thumbnail_url.split('?')[0]

724

thumbnails.append({

725

'url': thumbnail_url,

726

'height': int_or_none(thumbnail.get('height')),

727

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

733

"""

734

Extracts a relative time from string and converts to dt object

735

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

740

if start:

741

return datetime_from_str(start)

742

try:

743

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

748

"""@returns (timestamp, time_text)"""

749

text = self._get_text(renderer, *path_list) or ''

750

dt = self.extract_relative_time(text)

751

timestamp = None

752

if isinstance(dt, datetime.datetime):

753

timestamp = calendar.timegm(dt.timetuple())

754

755

if timestamp is None:

756

timestamp = (

757

unified_timestamp(text) or unified_timestamp(

758

self._search_regex(

759

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

760

text.lower(), 'time text', default=None)))

761

762

if text and timestamp is None:

763

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

764

return timestamp, text

765

766

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

767

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

768

default_client='web'):

769

for retry in self.RetryManager():

770

try:

771

response = self._call_api(

772

ep=ep, fatal=True, headers=headers,

773

video_id=item_id, query=query, note=note,

774

context=self._extract_context(ytcfg, default_client),

775

api_key=self._extract_api_key(ytcfg, default_client),

776

api_hostname=api_hostname, default_client=default_client)

777

except ExtractorError as e:

778

if not isinstance(e.cause, network_exceptions):

779

return self._error_or_warning(e, fatal=fatal)

780

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

785

if not is_html(first_bytes):

786

yt_error = try_get(

787

self._parse_json(

788

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

789

lambda x: x['error']['message'], str)

790

if yt_error:

791

self._report_alerts([('ERROR', yt_error)], fatal=False)

792

# Downloading page may result in intermittent 5xx HTTP error

793

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

794

# We also want to catch all other network exceptions since errors in later pages can be troublesome

795

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

796

if e.cause.code not in (403, 429):

797

retry.error = e

798

continue

799

return self._error_or_warning(e, fatal=fatal)

800

801

try:

802

self._extract_and_report_alerts(response, only_once=True)

803

except ExtractorError as e:

804

# YouTube servers may return errors we want to retry on in a 200 OK response

805

# See: https://github.com/yt-dlp/yt-dlp/issues/839

806

if 'unknown error' in e.msg.lower():

807

retry.error = e

808

continue

809

return self._error_or_warning(e, fatal=fatal)

810

# Youtube sometimes sends incomplete data

811

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

812

if not traverse_obj(response, *variadic(check_get_keys)):

813

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

820

return re.match(r'https?://music\.youtube\.com/', url) is not None

821

822

def _extract_video(self, renderer):

823

video_id = renderer.get('videoId')

824

title = self._get_text(renderer, 'title')

825

description = self._get_text(renderer, 'descriptionSnippet')

826

duration = parse_duration(self._get_text(

827

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

828

if duration is None:

829

duration = parse_duration(self._search_regex(

830

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

831

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

832

video_id, default=None, group='duration'))

833

834

view_count = self._get_count(renderer, 'viewCountText')

835

836

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

837

channel_id = traverse_obj(

838

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

839

expected_type=str, get_all=False)

840

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

841

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

842

overlay_style = traverse_obj(

843

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

844

get_all=False, expected_type=str)

845

badges = self._extract_badges(renderer)

846

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

847

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

848

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

849

expected_type=str)) or ''

850

url = f'https://www.youtube.com/watch?v={video_id}'

851

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

852

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

861

'duration': duration,

862

'view_count': view_count,

863

'uploader': uploader,

864

'channel_id': channel_id,

865

'thumbnails': thumbnails,

866

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

867

if self._configuration_arg('approximate_date', ie_key='youtubetab')

868

else None),

869

'live_status': ('is_upcoming' if scheduled_timestamp is not None

870

else 'was_live' if 'streamed' in time_text.lower()

871

else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges

872

else None),

873

'release_timestamp': scheduled_timestamp,

874

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

879

IE_DESC = 'YouTube'

880

_VALID_URL = r"""(?x)^

881

(

882

(?:https?://|//) # http(s):// or protocol-independent URL

883

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

884

(?:www\.)?deturl\.com/www\.youtube\.com|

885

(?:www\.)?pwnyoutube\.com|

886

(?:www\.)?hooktube\.com|

887

(?:www\.)?yourepeat\.com|

888

tube\.majestyc\.net|

889

%(invidious)s|

890

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

891

(?:.*?\#/)? # handle anchor (#/) redirect urls

892

(?: # the various things that can precede the ID:

893

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

894

|(?: # or the v= param in all its forms

895

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

896

(?:\?|\#!?) # the params delimiter ? or # or #!

897

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

903

vid\.plus| # or vid.plus/xxxx

904

zwearz\.com/watch| # or zwearz.com/watch/xxxx

905

%(invidious)s

906

)/

907

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

908

)

909

)? # all until now is optional -> you can pass the naked ID

910

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

911

(?(1).+)? # if we found the ID, everything can follow

912

(?:\#|$)""" % {

913

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

914

}

915

_EMBED_REGEX = [r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

926

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

927

\1''']

928

_PLAYER_INFO_RE = (

929

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

930

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

931

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

932

)

933

_formats = {

934

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

935

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

936

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

937

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

938

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

939

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

940

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

941

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

942

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

943

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

944

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

945

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

946

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

947

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

948

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

949

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

950

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

951

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

956

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

957

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

958

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

959

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

960

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

961

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

962

963

# Apple HTTP Live Streaming

964

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

965

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

966

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

967

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

968

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

969

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

970

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

971

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

972

973

# DASH mp4 video

974

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

975

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

976

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

977

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

979

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

980

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

983

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

984

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

985

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

987

# Dash mp4 audio

988

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

989

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

990

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

991

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

992

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

993

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

994

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

995

996

# Dash webm

997

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

998

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

999

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1000

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1003

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1004

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1005

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1006

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1007

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1012

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1013

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1014

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1015

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1016

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1017

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1018

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1019

1020

# Dash webm audio

1021

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1022

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1023

1024

# Dash webm audio with opus inside

1025

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1026

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1027

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1028

1029

# RTMP (unnamed)

1030

'_rtmp': {'protocol': 'rtmp'},

1031

1032

# av01 video only formats sometimes served with "unknown" codecs

1033

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1034

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1035

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1036

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1037

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1038

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1039

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1040

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1041

}

1042

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1054

'uploader': 'Philipp Hagemeister',

1055

'uploader_id': 'phihag',

1056

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1057

'channel': 'Philipp Hagemeister',

1058

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1059

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1060

'upload_date': '20121002',

1061

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1062

'categories': ['Science & Technology'],

1063

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1068

'playable_in_embed': True,

1069

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1070

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1075

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1080

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1085

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1086

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1087

'uploader': 'SET India',

1088

'uploader_id': 'setindia',

1089

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1090

'age_limit': 18,

1091

},

1092

'skip': 'Private video',

1093

},

1094

{

1095

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1096

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1101

'uploader': 'Philipp Hagemeister',

1102

'uploader_id': 'phihag',

1103

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1104

'channel': 'Philipp Hagemeister',

1105

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1106

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1107

'upload_date': '20121002',

1108

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1109

'categories': ['Science & Technology'],

1110

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1115

'playable_in_embed': True,

1116

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1117

'live_status': 'not_live',

1118

'age_limit': 0,

1119

'comment_count': int,

1120

'channel_follower_count': int

1121

},

1122

'params': {

1123

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1128

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1133

'uploader_id': '8KVIDEO',

1134

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1135

'description': '',

1136

'uploader': '8KVIDEO',

1137

'title': 'UHDTV TEST 8K VIDEO.mp4'

1138

},

1139

'params': {

1140

'youtube_include_dash_manifest': True,

1141

'format': '141',

1142

},

1143

'skip': 'format 141 not served anymore',

1144

},

1145

# DASH manifest with encrypted signature

1146

{

1147

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1152

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1153

'duration': 244,

1154

'uploader': 'AfrojackVEVO',

1155

'uploader_id': 'AfrojackVEVO',

1156

'upload_date': '20131011',

1157

'abr': 129.495,

1158

'like_count': int,

1159

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1160

'playable_in_embed': True,

1161

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1162

'view_count': int,

1163

'track': 'The Spark',

1164

'live_status': 'not_live',

1165

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1166

'channel': 'Afrojack',

1167

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1168

'tags': 'count:19',

1169

'availability': 'public',

1170

'categories': ['Music'],

1171

'age_limit': 0,

1172

'alt_title': 'The Spark',

1173

'channel_follower_count': int

1174

},

1175

'params': {

1176

'youtube_include_dash_manifest': True,

1177

'format': '141/bestaudio[ext=m4a]',

1178

},

1179

},

1180

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1181

{

1182

'note': 'Embed allowed age-gate video',

1183

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1188

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1189

'duration': 142,

1190

'uploader': 'The Witcher',

1191

'uploader_id': 'WitcherGame',

1192

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1193

'upload_date': '20140605',

1194

'age_limit': 18,

1195

'categories': ['Gaming'],

1196

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1197

'availability': 'needs_auth',

1198

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1199

'like_count': int,

1200

'channel': 'The Witcher',

1201

'live_status': 'not_live',

1202

'tags': 'count:17',

1203

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1204

'playable_in_embed': True,

1205

'view_count': int,

1206

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1211

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1216

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1217

'upload_date': '20200408',

1218

'uploader_id': 'FlyingKitty900',

1219

'uploader': 'FlyingKitty',

1220

'age_limit': 18,

1221

'availability': 'needs_auth',

1222

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1223

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1224

'channel': 'FlyingKitty',

1225

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1226

'view_count': int,

1227

'categories': ['Entertainment'],

1228

'live_status': 'not_live',

1229

'tags': ['Flyingkitty', 'godzilla 2'],

1230

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1231

'like_count': int,

1232

'duration': 177,

1233

'playable_in_embed': True,

1234

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1239

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1240

'info_dict': {

1241

'id': 'Tq92D6wQ1mg',

1242

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1243

'ext': 'mp4',

1244

'upload_date': '20191228',

1245

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1246

'uploader': 'Projekt Melody',

1247

'description': 'md5:17eccca93a786d51bc67646756894066',

1248

'age_limit': 18,

1249

'like_count': int,

1250

'availability': 'needs_auth',

1251

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1252

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1253

'view_count': int,

1254

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1255

'channel': 'Projekt Melody',

1256

'live_status': 'not_live',

1257

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1258

'playable_in_embed': True,

1259

'categories': ['Entertainment'],

1260

'duration': 106,

1261

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1262

'comment_count': int,

1263

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1268

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1273

'uploader': 'Herr Lurik',

1274

'uploader_id': 'st3in234',

1275

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1276

'upload_date': '20130730',

1277

'track': 'Such mich find mich',

1278

'age_limit': 0,

1279

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1280

'like_count': int,

1281

'playable_in_embed': False,

1282

'creator': 'OOMPH!',

1283

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1284

'view_count': int,

1285

'alt_title': 'Such mich find mich',

1286

'duration': 210,

1287

'channel': 'Herr Lurik',

1288

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1289

'categories': ['Music'],

1290

'availability': 'public',

1291

'uploader_url': 'http://www.youtube.com/user/st3in234',

1292

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1293

'live_status': 'not_live',

1294

'artist': 'OOMPH!',

1295

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1300

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1301

'only_matching': True,

1302

},

1303

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1304

# YouTube Red ad is not captured for creator

1305

{

1306

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1312

'uploader_id': 'deadmau5',

1313

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1314

'creator': 'deadmau5',

1315

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1316

'uploader': 'deadmau5',

1317

'title': 'Deadmau5 - Some Chords (HD)',

1318

'alt_title': 'Some Chords',

1319

'availability': 'public',

1320

'tags': 'count:14',

1321

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1322

'view_count': int,

1323

'live_status': 'not_live',

1324

'channel': 'deadmau5',

1325

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1326

'like_count': int,

1327

'track': 'Some Chords',

1328

'artist': 'deadmau5',

1329

'playable_in_embed': True,

1330

'age_limit': 0,

1331

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1332

'categories': ['Music'],

1333

'album': 'Some Chords',

1334

'channel_follower_count': int

1335

},

1336

'expected_warnings': [

1337

'DASH manifest missing',

1338

]

1339

},

1340

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1341

{

1342

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1348

'uploader_id': 'olympic',

1349

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1350

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1351

'uploader': 'Olympics',

1352

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1353

'like_count': int,

1354

'release_timestamp': 1343767800,

1355

'playable_in_embed': True,

1356

'categories': ['Sports'],

1357

'release_date': '20120731',

1358

'channel': 'Olympics',

1359

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1360

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1361

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1362

'age_limit': 0,

1363

'availability': 'public',

1364

'live_status': 'was_live',

1365

'view_count': int,

1366

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1367

'channel_follower_count': int

1368

},

1369

'params': {

1370

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1380

'duration': 85,

1381

'upload_date': '20110310',

1382

'uploader_id': 'AllenMeow',

1383

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1384

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1385

'uploader': '孫ᄋᄅ',

1386

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1387

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1392

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1393

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1394

'view_count': int,

1395

'categories': ['People & Blogs'],

1396

'like_count': int,

1397

'live_status': 'not_live',

1398

'availability': 'unlisted',

1399

'comment_count': int,

1400

'channel_follower_count': int

1401

},

1402

},

1403

# url_encoded_fmt_stream_map is empty string

1404

{

1405

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1410

'description': '',

1411

'upload_date': '20150404',

1412

'uploader_id': 'spbelect',

1413

'uploader': 'Наблюдатели Петербурга',

1414

},

1415

'params': {

1416

'skip_download': 'requires avconv',

1417

},

1418

'skip': 'This live event has ended.',

1419

},

1420

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1421

{

1422

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1427

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1428

'duration': 220,

1429

'upload_date': '20150625',

1430

'uploader_id': 'dorappi2000',

1431

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1432

'uploader': 'dorappi2000',

1433

'formats': 'mincount:31',

1434

},

1435

'skip': 'not actual anymore',

1436

},

1437

# DASH manifest with segment_list

1438

{

1439

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1440

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1445

'uploader': 'Airtek',

1446

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1447

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1448

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1449

},

1450

'params': {

1451

'youtube_include_dash_manifest': True,

1452

'format': '135', # bestvideo

1453

},

1454

'skip': 'This live event has ended.',

1455

},

1456

{

1457

# Multifeed videos (multiple cameras), URL is for Main Camera

1458

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1459

'info_dict': {

1460

'id': 'jvGDaLqkpTg',

1461

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1462

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1469

'description': 'md5:e03b909557865076822aa169218d6a5d',

1470

'duration': 10643,

1471

'upload_date': '20161111',

1472

'uploader': 'Team PGP',

1473

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1474

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1481

'description': 'md5:e03b909557865076822aa169218d6a5d',

1482

'duration': 10991,

1483

'upload_date': '20161111',

1484

'uploader': 'Team PGP',

1485

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1486

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1493

'description': 'md5:e03b909557865076822aa169218d6a5d',

1494

'duration': 10995,

1495

'upload_date': '20161111',

1496

'uploader': 'Team PGP',

1497

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1498

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1505

'description': 'md5:e03b909557865076822aa169218d6a5d',

1506

'duration': 10990,

1507

'upload_date': '20161111',

1508

'uploader': 'Team PGP',

1509

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1510

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1515

},

1516

'skip': 'Not multifeed anymore',

1517

},

1518

{

1519

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1520

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1521

'info_dict': {

1522

'id': 'gVfLd0zydlo',

1523

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1524

},

1525

'playlist_count': 2,

1526

'skip': 'Not multifeed anymore',

1527

},

1528

{

1529

'url': 'https://vid.plus/FlRa-iH7PGw',

1530

'only_matching': True,

1531

},

1532

{

1533

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1534

'only_matching': True,

1535

},

1536

{

1537

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1538

# Also tests cut-off URL expansion in video description (see

1539

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1540

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1541

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1546

'alt_title': 'Dark Walk',

1547

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1548

'duration': 133,

1549

'upload_date': '20151119',

1550

'uploader_id': 'IronSoulElf',

1551

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1552

'uploader': 'IronSoulElf',

1553

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1554

'track': 'Dark Walk',

1555

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1556

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1557

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1558

'categories': ['Film & Animation'],

1559

'view_count': int,

1560

'live_status': 'not_live',

1561

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1562

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1563

'tags': 'count:13',

1564

'availability': 'public',

1565

'channel': 'IronSoulElf',

1566

'playable_in_embed': True,

1567

'like_count': int,

1568

'age_limit': 0,

1569

'channel_follower_count': int

1570

},

1571

'params': {

1572

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1577

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1578

'only_matching': True,

1579

},

1580

{

1581

# Video with yt:stretch=17:0

1582

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1587

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1588

'upload_date': '20151107',

1589

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1590

'uploader': 'CH GAMER DROID',

1591

},

1592

'params': {

1593

'skip_download': True,

1594

},

1595

'skip': 'This video does not exist.',

1596

},

1597

{

1598

# Video with incomplete 'yt:stretch=16:'

1599

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1600

'only_matching': True,

1601

},

1602

{

1603

# Video licensed under Creative Commons

1604

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1609

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1610

'duration': 721,

1611

'upload_date': '20150128',

1612

'uploader_id': 'BerkmanCenter',

1613

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1614

'uploader': 'The Berkman Klein Center for Internet & Society',

1615

'license': 'Creative Commons Attribution license (reuse allowed)',

1616

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1617

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1618

'like_count': int,

1619

'age_limit': 0,

1620

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1621

'channel': 'The Berkman Klein Center for Internet & Society',

1622

'availability': 'public',

1623

'view_count': int,

1624

'categories': ['Education'],

1625

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1626

'live_status': 'not_live',

1627

'playable_in_embed': True,

1628

'comment_count': int,

1629

'channel_follower_count': int

1630

},

1631

'params': {

1632

'skip_download': True,

},

},

{

# Channel-like uploader_url

1637

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1642

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1643

'duration': 4060,

1644

'upload_date': '20151120',

1645

'uploader': 'Bernie Sanders',

1646

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1647

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1648

'license': 'Creative Commons Attribution license (reuse allowed)',

1649

'playable_in_embed': True,

1650

'tags': 'count:12',

1651

'like_count': int,

1652

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1653

'age_limit': 0,

1654

'availability': 'public',

1655

'categories': ['News & Politics'],

1656

'channel': 'Bernie Sanders',

1657

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1658

'view_count': int,

1659

'live_status': 'not_live',

1660

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1661

'comment_count': int,

1662

'channel_follower_count': int

1663

},

1664

'params': {

1665

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1670

'only_matching': True,

1671

},

1672

{

1673

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1674

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1675

'only_matching': True,

1676

},

1677

{

1678

# Rental video preview

1679

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1684

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1685

'upload_date': '20150811',

1686

'uploader': 'FlixMatrix',

1687

'uploader_id': 'FlixMatrixKaravan',

1688

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1689

'license': 'Standard YouTube License',

1690

},

1691

'params': {

1692

'skip_download': True,

1693

},

1694

'skip': 'This video is not available.',

1695

},

1696

{

1697

# YouTube Red video with episode data

1698

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1703

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1704

'duration': 2085,

1705

'upload_date': '20170118',

1706

'uploader': 'Vsauce',

1707

'uploader_id': 'Vsauce',

1708

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1709

'series': 'Mind Field',

1710

'season_number': 1,

1711

'episode_number': 1,

1712

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1713

'tags': 'count:12',

1714

'view_count': int,

1715

'availability': 'public',

1716

'age_limit': 0,

1717

'channel': 'Vsauce',

1718

'episode': 'Episode 1',

1719

'categories': ['Entertainment'],

1720

'season': 'Season 1',

1721

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1722

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1723

'like_count': int,

1724

'playable_in_embed': True,

1725

'live_status': 'not_live',

1726

'channel_follower_count': int

1727

},

1728

'params': {

1729

'skip_download': True,

1730

},

1731

'expected_warnings': [

1732

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1737

# as inappropriate or offensive to some audiences.

1738

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1743

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1744

'duration': 965,

1745

'upload_date': '20140124',

1746

'uploader': 'New Century Foundation',

1747

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1748

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1749

},

1750

'params': {

1751

'skip_download': True,

1752

},

1753

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1758

'only_matching': True,

1759

},

1760

{

1761

# geo restricted to JP

1762

'url': 'sJL6WA-aGkQ',

1763

'only_matching': True,

1764

},

1765

{

1766

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1767

'only_matching': True,

1768

},

1769

{

1770

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1771

'only_matching': True,

1772

},

1773

{

1774

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1775

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1776

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1781

'only_matching': True,

1782

},

1783

{

1784

# Video with unsupported adaptive stream type formats

1785

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1790

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1791

'duration': 433,

1792

'upload_date': '20130923',

1793

'uploader': 'Amelia Putri Harwita',

1794

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1795

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1796

'formats': 'maxcount:10',

1797

},

1798

'params': {

1799

'skip_download': True,

1800

'youtube_include_dash_manifest': False,

1801

},

1802

'skip': 'not actual anymore',

1803

},

1804

{

1805

# Youtube Music Auto-generated description

1806

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1811

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1812

'upload_date': '20190312',

1813

'uploader': 'Stephen - Topic',

1814

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1815

'artist': 'Stephen',

1816

'track': 'Voyeur Girl',

1817

'album': 'it\'s too much love to know my dear',

1818

'release_date': '20190313',

1819

'release_year': 2019,

1820

'alt_title': 'Voyeur Girl',

1821

'view_count': int,

1822

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1823

'playable_in_embed': True,

1824

'like_count': int,

1825

'categories': ['Music'],

1826

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1827

'channel': 'Stephen',

1828

'availability': 'public',

1829

'creator': 'Stephen',

1830

'duration': 169,

1831

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1832

'age_limit': 0,

1833

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1834

'tags': 'count:11',

1835

'live_status': 'not_live',

1836

'channel_follower_count': int

1837

},

1838

'params': {

1839

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1844

'only_matching': True,

1845

},

1846

{

1847

# invalid -> valid video id redirection

1848

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1853

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1854

'upload_date': '20090125',

1855

'uploader': 'Prochorowka',

1856

'uploader_id': 'Prochorowka',

1857

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1858

'artist': 'Panjabi MC',

1859

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1860

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1861

},

1862

'params': {

1863

'skip_download': True,

1864

},

1865

'skip': 'Video unavailable',

1866

},

1867

{

1868

# empty description results in an empty string

1869

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1876

'uploader_id': 'ElevageOrVert',

1877

'uploader': 'ElevageOrVert',

1878

'view_count': int,

1879

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1880

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1881

'like_count': int,

1882

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1883

'tags': [],

1884

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1885

'availability': 'public',

1886

'age_limit': 0,

1887

'categories': ['Pets & Animals'],

1888

'duration': 7,

1889

'playable_in_embed': True,

1890

'live_status': 'not_live',

1891

'channel': 'ElevageOrVert',

1892

'channel_follower_count': int

1893

},

1894

'params': {

1895

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1900

# see [2] for an example with '};' inside ytInitialPlayerResponse

1901

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1902

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1903

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1908

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1909

'upload_date': '20130831',

1910

'uploader_id': 'kudvenkat',

1911

'uploader': 'kudvenkat',

1912

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1913

'like_count': int,

1914

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1915

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1916

'live_status': 'not_live',

1917

'categories': ['Education'],

1918

'availability': 'public',

1919

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1920

'tags': 'count:12',

1921

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1926

'comment_count': int,

1927

'channel_follower_count': int

1928

},

1929

'params': {

1930

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1935

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1936

'only_matching': True,

1937

},

1938

{

1939

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1940

'only_matching': True,

1941

},

1942

{

1943

# https://github.com/ytdl-org/youtube-dl/pull/28094

1944

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1950

'upload_date': '20141120',

1951

'uploader': 'The Cinematic Orchestra - Topic',

1952

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1953

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1954

'artist': 'The Cinematic Orchestra',

1955

'track': 'Burn Out',

1956

'album': 'Every Day',

1957

'like_count': int,

1958

'live_status': 'not_live',

1959

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1964

'creator': 'The Cinematic Orchestra',

1965

'channel': 'The Cinematic Orchestra',

1966

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1967

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1968

'availability': 'public',

1969

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1970

'categories': ['Music'],

1971

'playable_in_embed': True,

1972

'channel_follower_count': int

1973

},

1974

'params': {

1975

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1980

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1981

'only_matching': True,

1982

},

1983

{

1984

# controversial video, requires bpctr/contentCheckOk

1985

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1990

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1991

'uploader': 'CBS Mornings',

1992

'uploader_id': 'CBSThisMorning',

1993

'upload_date': '20140716',

1994

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1995

'duration': 170,

1996

'categories': ['News & Politics'],

1997

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1998

'view_count': int,

1999

'channel': 'CBS Mornings',

2000

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2001

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2002

'age_limit': 18,

2003

'availability': 'needs_auth',

2004

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2005

'like_count': int,

2006

'live_status': 'not_live',

2007

'playable_in_embed': True,

2008

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2013

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2018

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2019

'upload_date': '20201120',

2020

'uploader': 'Walk around Japan',

2021

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2022

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2023

'duration': 1456,

2024

'categories': ['Travel & Events'],

2025

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2026

'view_count': int,

2027

'channel': 'Walk around Japan',

2028

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2029

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2030

'age_limit': 0,

2031

'availability': 'public',

2032

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2033

'live_status': 'not_live',

2034

'playable_in_embed': True,

2035

'channel_follower_count': int

2036

},

2037

'params': {

2038

'skip_download': True,

2039

},

2040

}, {

2041

# Has multiple audio streams

2042

'url': 'WaOKSUlf4TM',

2043

'only_matching': True

2044

}, {

2045

# Requires Premium: has format 141 when requested using YTM url

2046

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2047

'only_matching': True

2048

}, {

2049

# multiple subtitles with same lang_code

2050

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2051

'only_matching': True,

2052

}, {

2053

# Force use android client fallback

2054

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2055

'info_dict': {

2056

'id': 'YOelRv7fMxY',

2057

'title': 'DIGGING A SECRET TUNNEL Part 1',

2058

'ext': '3gp',

2059

'upload_date': '20210624',

2060

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2061

'uploader': 'colinfurze',

2062

'uploader_id': 'colinfurze',

2063

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2064

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2065

'duration': 596,

2066

'categories': ['Entertainment'],

2067

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2068

'view_count': int,

2069

'channel': 'colinfurze',

2070

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2071

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2072

'age_limit': 0,

2073

'availability': 'public',

2074

'like_count': int,

2075

'live_status': 'not_live',

2076

'playable_in_embed': True,

2077

'channel_follower_count': int

2078

},

2079

'params': {

2080

'format': '17', # 3gp format available on android

2081

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2086

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2087

'only_matching': True,

2088

'params': {

2089

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2094

'only_matching': True,

2095

}, {

2096

'note': 'Storyboards',

2097

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2103

'uploader_id': 'scishow',

2104

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2105

'upload_date': '20140324',

2106

'uploader': 'SciShow',

2107

'like_count': int,

2108

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2109

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2110

'view_count': int,

2111

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2112

'playable_in_embed': True,

2113

'tags': 'count:12',

2114

'uploader_url': 'http://www.youtube.com/user/scishow',

2115

'availability': 'public',

2116

'channel': 'SciShow',

2117

'live_status': 'not_live',

2118

'duration': 248,

2119

'categories': ['Education'],

2120

'age_limit': 0,

2121

'channel_follower_count': int

2122

}, 'params': {'format': 'mhtml', 'skip_download': True}

2123

}, {

2124

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2125

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2130

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2131

'uploader': 'Leon Nguyen',

2132

'uploader_id': 'VNSXIII',

2133

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2134

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2135

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2140

'tags': 'count:23',

2141

'playable_in_embed': True,

2142

'live_status': 'not_live',

2143

'upload_date': '20220103',

2144

'like_count': int,

2145

'availability': 'public',

2146

'channel': 'Leon Nguyen',

2147

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2148

'comment_count': int,

2149

'channel_follower_count': int

2150

}

2151

}, {

2152

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2153

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2158

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2159

'uploader': 'Quackity',

2160

'uploader_id': 'QuackityHQ',

2161

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2162

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2163

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2168

'tags': 'count:26',

2169

'playable_in_embed': True,

2170

'live_status': 'not_live',

2171

'release_timestamp': 1641172509,

2172

'release_date': '20220103',

2173

'upload_date': '20220103',

2174

'like_count': int,

2175

'availability': 'public',

2176

'channel': 'Quackity',

2177

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2178

'channel_follower_count': int

2179

}

2180

},

2181

{ # continuous livestream. Microformat upload date should be preferred.

2182

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2183

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2184

'info_dict': {

2185

'id': 'kgx4WGK0oNU',

2186

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2187

'ext': 'mp4',

2188

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2189

'availability': 'public',

2190

'age_limit': 0,

2191

'release_timestamp': 1637975704,

2192

'upload_date': '20210619',

2193

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2194

'live_status': 'is_live',

2195

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2196

'uploader': '阿鲍Abao',

2197

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2198

'channel': 'Abao in Tokyo',

2199

'channel_follower_count': int,

2200

'release_date': '20211127',

2201

'tags': 'count:39',

2202

'categories': ['People & Blogs'],

2203

'like_count': int,

2204

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2205

'view_count': int,

2206

'playable_in_embed': True,

2207

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2208

},

2209

'params': {'skip_download': True}

2210

}, {

2211

# Story. Requires specific player params to work.

2212

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2217

'view_count': int,

2218

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2219

'upload_date': '20220526',

2220

'categories': ['Education'],

2221

'title': 'Story',

2222

'channel': 'IT\'S HISTORY',

2223

'description': '',

2224

'uploader_id': 'BlastfromthePast',

2225

'duration': 12,

2226

'uploader': 'IT\'S HISTORY',

2227

'playable_in_embed': True,

2228

'age_limit': 0,

2229

'live_status': 'not_live',

2230

'tags': [],

2231

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2232

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2233

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2234

},

2235

'skip': 'stories get removed after some period of time',

2236

}, {

2237

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2242

'upload_date': '20220323',

2243

'like_count': int,

2244

'availability': 'unlisted',

2245

'channel': 'nao20010128nao',

2246

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2247

'age_limit': 0,

2248

'uploader': 'nao20010128nao',

2249

'uploader_id': 'nao20010128nao',

2250

'categories': ['Music'],

2251

'view_count': int,

2252

'description': '',

2253

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2254

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2255

'live_status': 'not_live',

2256

'playable_in_embed': True,

2257

'channel_follower_count': int,

2258

'duration': 6,

2259

'tags': [],

2260

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2261

}

2262

}, {

2263

'note': '6 channel audio',

2264

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2265

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2271

{

2272

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2273

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2278

'upload_date': '20080526',

2279

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2280

'uploader': 'Christopher Sykes',

2281

'uploader_id': 'ChristopherJSykes',

2282

'age_limit': 0,

2283

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2284

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2285

'playable_in_embed': True,

2286

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2287

'like_count': int,

2288

'comment_count': int,

2289

'channel': 'Christopher Sykes',

2290

'live_status': 'not_live',

2291

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2292

'availability': 'public',

2293

'duration': 195,

2294

'view_count': int,

2295

'categories': ['Science & Technology'],

2296

'channel_follower_count': int,

2297

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2298

},

2299

'params': {

2300

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2307

from ..utils import parse_qs

2308

2309

qs = parse_qs(url)

2310

if qs.get('list', [None])[0]:

2311

return False

2312

return super().suitable(url)

2313

2314

def __init__(self, *args, **kwargs):

2315

super().__init__(*args, **kwargs)

2316

self._code_cache = {}

2317

self._player_cache = {}

2318

2319

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2320

lock = threading.Lock()

2321

2322

is_live = True

2323

start_time = time.time()

2324

formats = [f for f in formats if f.get('is_from_start')]

2325

2326

def refetch_manifest(format_id, delay):

2327

nonlocal formats, start_time, is_live

2328

if time.time() <= start_time + delay:

2329

return

2330

2331

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2332

video_details = traverse_obj(

2333

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2334

microformats = traverse_obj(

2335

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2336

expected_type=dict, default=[])

2337

_, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2338

start_time = time.time()

2339

2340

def mpd_feed(format_id, delay):

2341

"""

2342

@returns (manifest_url, manifest_stream_number, is_live) or None

2343

"""

2344

with lock:

2345

refetch_manifest(format_id, delay)

2346

2347

f = next((f for f in formats if f['format_id'] == format_id), None)

2348

if not f:

2349

if not is_live:

2350

self.to_screen(f'{video_id}: Video is no longer live')

2351

else:

2352

self.report_warning(

2353

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2354

return None

2355

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2360

f['fragments'] = functools.partial(

2361

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2362

2363

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2364

FETCH_SPAN, MAX_DURATION = 5, 432000

2365

2366

mpd_url, stream_number, is_live = None, None, True

2367

2368

begin_index = 0

2369

download_start_time = ctx.get('start') or time.time()

2370

2371

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2372

if lack_early_segments:

2373

self.report_warning(bug_reports_message(

2374

'Starting download from the last 120 hours of the live stream since '

2375

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2376

lack_early_segments = True

2377

2378

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2379

fragments, fragment_base_url = None, None

2380

2381

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2382

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2383

# Obtain from MPD's maximum seq value

2384

old_mpd_url = mpd_url

2385

last_error = ctx.pop('last_error', None)

2386

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2387

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2388

or (mpd_url, stream_number, False))

2389

if not refresh_sequence:

2390

if expire_fast and not is_live:

2391

return False, last_seq

2392

elif old_mpd_url == mpd_url:

2393

return True, last_seq

2394

try:

2395

fmts, _ = self._extract_mpd_formats_and_subtitles(

2396

mpd_url, None, note=False, errnote=False, fatal=False)

2397

except ExtractorError:

2398

fmts = None

2399

if not fmts:

2400

no_fragment_score += 2

2401

return False, last_seq

2402

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2403

fragments = fmt_info['fragments']

2404

fragment_base_url = fmt_info['fragment_base_url']

2405

assert fragment_base_url

2406

2407

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2408

return True, _last_seq

2409

2410

while is_live:

2411

fetch_time = time.time()

2412

if no_fragment_score > 30:

2413

return

2414

if last_segment_url:

2415

# Obtain from "X-Head-Seqnum" header value from each segment

2416

try:

2417

urlh = self._request_webpage(

2418

last_segment_url, None, note=False, errnote=False, fatal=False)

2419

except ExtractorError:

2420

urlh = None

2421

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2422

if last_seq is None:

2423

no_fragment_score += 2

2424

last_segment_url = None

2425

continue

2426

else:

2427

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2428

no_fragment_score += 2

2429

if not should_continue:

2430

continue

2431

2432

if known_idx > last_seq:

2433

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2439

# skip from the start when it's negative value

2440

known_idx = last_seq + begin_index

2441

if lack_early_segments:

2442

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2443

try:

2444

for idx in range(known_idx, last_seq):

2445

# do not update sequence here or you'll get skipped some part of it

2446

should_continue, _ = _extract_sequence_from_mpd(False, False)

2447

if not should_continue:

2448

known_idx = idx - 1

2449

raise ExtractorError('breaking out of outer loop')

2450

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2451

yield {

2452

'url': last_segment_url,

2453

'fragment_count': last_seq,

2454

}

2455

if known_idx == last_seq:

2456

no_fragment_score += 5

2457

else:

2458

no_fragment_score = 0

2459

known_idx = last_seq

2460

except ExtractorError:

2461

continue

2462

2463

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2464

2465

def _extract_player_url(self, *ytcfgs, webpage=None):

2466

player_url = traverse_obj(

2467

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2468

get_all=False, expected_type=str)

2469

if not player_url:

2470

return

2471

return urljoin('https://www.youtube.com', player_url)

2472

2473

def _download_player_url(self, video_id, fatal=False):

2474

res = self._download_webpage(

2475

'https://www.youtube.com/iframe_api',

2476

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2477

if res:

2478

player_version = self._search_regex(

2479

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2480

if player_version:

2481

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2482

2483

def _signature_cache_id(self, example_sig):

2484

""" Return a string representation of a signature """

2485

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2486

2487

@classmethod

2488

def _extract_player_info(cls, player_url):

2489

for player_re in cls._PLAYER_INFO_RE:

2490

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2495

return id_m.group('id')

2496

2497

def _load_player(self, video_id, player_url, fatal=True):

2498

player_id = self._extract_player_info(player_url)

2499

if player_id not in self._code_cache:

2500

code = self._download_webpage(

2501

player_url, video_id, fatal=fatal,

2502

note='Downloading player ' + player_id,

2503

errnote='Download of %s failed' % player_url)

2504

if code:

2505

self._code_cache[player_id] = code

2506

return self._code_cache.get(player_id)

2507

2508

def _extract_signature_function(self, video_id, player_url, example_sig):

2509

player_id = self._extract_player_info(player_url)

2510

2511

# Read from filesystem cache

2512

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2513

assert os.path.basename(func_id) == func_id

2514

2515

self.write_debug(f'Extracting signature function {func_id}')

2516

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2517

2518

if not cache_spec:

2519

code = self._load_player(video_id, player_url)

2520

if code:

2521

res = self._parse_sig_js(code)

2522

test_string = ''.join(map(chr, range(len(example_sig))))

2523

cache_spec = [ord(c) for c in res(test_string)]

2524

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2525

2526

return lambda s: ''.join(s[i] for i in cache_spec)

2527

2528

def _print_sig_code(self, func, example_sig):

2529

if not self.get_param('youtube_print_sig_code'):

2530

return

2531

2532

def gen_sig_code(idxs):

2533

def _genslice(start, end, step):

2534

starts = '' if start == 0 else str(start)

2535

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2536

steps = '' if step == 1 else (':%d' % step)

2537

return f's[{starts}{ends}{steps}]'

2538

2539

step = None

2540

# Quelch pyflakes warnings - start will be set when step is set

2541

start = '(Never used)'

2542

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2547

step = None

2548

continue

2549

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2559

2560

test_string = ''.join(map(chr, range(len(example_sig))))

2561

cache_res = func(test_string)

2562

cache_spec = [ord(c) for c in cache_res]

2563

expr_code = ' + '.join(gen_sig_code(cache_spec))

2564

signature_id_tuple = '(%s)' % (

2565

', '.join(str(len(p)) for p in example_sig.split('.')))

2566

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2567

' return %s\n') % (signature_id_tuple, expr_code)

2568

self.to_screen('Extracted signature function:\n' + code)

2569

2570

def _parse_sig_js(self, jscode):

2571

funcname = self._search_regex(

2572

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2573

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2574

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2575

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2576

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2577

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2578

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2579

# Obsolete patterns

2580

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2581

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2582

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2583

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2584

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2585

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2586

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2587

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2588

jscode, 'Initial JS player signature function name', group='sig')

2589

2590

jsi = JSInterpreter(jscode)

2591

initial_function = jsi.extract_function(funcname)

2592

return lambda s: initial_function([s])

2593

2594

def _cached(self, func, *cache_id):

2595

def inner(*args, **kwargs):

2596

if cache_id not in self._player_cache:

2597

try:

2598

self._player_cache[cache_id] = func(*args, **kwargs)

2599

except ExtractorError as e:

2600

self._player_cache[cache_id] = e

2601

except Exception as e:

2602

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2603

2604

ret = self._player_cache[cache_id]

2605

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2611

"""Turn the encrypted s field into a working signature"""

2612

extract_sig = self._cached(

2613

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2614

func = extract_sig(video_id, player_url, s)

2615

self._print_sig_code(func, s)

2616

return func(s)

2617

2618

def _decrypt_nsig(self, s, video_id, player_url):

2619

"""Turn the encrypted n field into a working signature"""

2620

if player_url is None:

2621

raise ExtractorError('Cannot decrypt nsig without player_url')

2622

player_url = urljoin('https://www.youtube.com', player_url)

2623

2624

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2625

if self.get_param('youtube_print_sig_code'):

2626

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2627

2628

try:

2629

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2630

ret = extract_nsig(jsi, func_code)(s)

2631

except JSInterpreter.Exception as e:

2632

try:

2633

jsi = PhantomJSwrapper(self, timeout=5000)

2634

except ExtractorError:

2635

raise e

2636

self.report_warning(

2637

f'Native nsig extraction failed: Trying with PhantomJS\n'

2638

f' n = {s} ; player = {player_url}', video_id)

2639

self.write_debug(e)

2640

2641

args, func_body = func_code

2642

ret = jsi.execute(

2643

f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',

2644

video_id=video_id, note='Executing signature code').strip()

2645

2646

self.write_debug(f'Decrypted nsig {s} => {ret}')

2647

return ret

2648

2649

def _extract_n_function_name(self, jscode):

2650

funcname, idx = self._search_regex(

2651

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2652

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

if not idx:

return funcname

return json.loads(js_to_json(self._search_regex(

2657

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2658

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2659

2660

def _extract_n_function_code(self, video_id, player_url):

2661

player_id = self._extract_player_info(player_url)

2662

func_code = self.cache.load('youtube-nsig', player_id, after='2022.08.19')

2663

jscode = func_code or self._load_player(video_id, player_url)

2664

jsi = JSInterpreter(jscode)

2665

2666

if func_code:

2667

return jsi, player_id, func_code

2668

2669

func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))

2670

self.cache.store('youtube-nsig', player_id, func_code)

2671

return jsi, player_id, func_code

2672

2673

def _extract_n_function_from_code(self, jsi, func_code):

2674

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

try:

ret = func([s])

except JSInterpreter.Exception:

2680

raise

2681

except Exception as e:

2682

raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

2683

2684

if ret.startswith('enhanced_except_'):

2685

raise JSInterpreter.Exception('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2691

"""

2692

Extract signatureTimestamp (sts)

2693

Required to tell API what sig/player version is in use.

2694

"""

2695

sts = None

2696

if isinstance(ytcfg, dict):

2697

sts = int_or_none(ytcfg.get('STS'))

2698

2699

if not sts:

2700

# Attempt to extract from player

2701

if player_url is None:

2702

error_msg = 'Cannot extract signature timestamp without player_url.'

2703

if fatal:

2704

raise ExtractorError(error_msg)

2705

self.report_warning(error_msg)

2706

return

2707

code = self._load_player(video_id, player_url, fatal=fatal)

2708

if code:

2709

sts = int_or_none(self._search_regex(

2710

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2711

'JS player signature timestamp', group='sts', fatal=fatal))

2712

return sts

2713

2714

def _mark_watched(self, video_id, player_responses):

2715

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2716

label = 'fully ' if is_full else ''

2717

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2718

expected_type=url_or_none)

2719

if not url:

2720

self.report_warning(f'Unable to mark {label}watched')

2721

return

2722

parsed_url = urllib.parse.urlparse(url)

2723

qs = urllib.parse.parse_qs(parsed_url.query)

2724

2725

# cpn generation algorithm is reverse engineered from base.js.

2726

# In fact it works even with dummy cpn.

2727

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2728

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2729

2730

# # more consistent results setting it to right before the end

2731

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2742

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2749

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2750

2751

self._download_webpage(

2752

url, video_id, f'Marking {label}watched',

2753

'Unable to mark watched', fatal=False)

2754

2755

@classmethod

2756

def _extract_from_webpage(cls, url, webpage):

2757

# Invidious Instances

2758

# https://github.com/yt-dlp/yt-dlp/issues/195

2759

# https://github.com/iv-org/invidious/pull/1730

2760

mobj = re.search(

2761

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2762

webpage)

2763

if mobj:

2764

yield cls.url_result(mobj.group('url'), cls)

2765

raise cls.StopExtraction()

2766

2767

yield from super()._extract_from_webpage(url, webpage)

2768

2769

# lazyYT YouTube embed

2770

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2771

yield cls.url_result(unescapeHTML(id_), cls, id_)

2772

2773

# Wordpress "YouTube Video Importer" plugin

2774

for m in re.findall(r'''(?x)<div[^>]+

2775

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2776

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2777

yield cls.url_result(m[-1], cls, m[-1])

2778

2779

@classmethod

2780

def extract_id(cls, url):

2781

video_id = cls.get_temp_id(url)

2782

if not video_id:

2783

raise ExtractorError(f'Invalid URL: {url}')

2784

return video_id

2785

2786

def _extract_chapters_from_json(self, data, duration):

2787

chapter_list = traverse_obj(

2788

data, (

2789

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2790

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2791

), expected_type=list)

2792

2793

return self._extract_chapters(

2794

chapter_list,

2795

chapter_time=lambda chapter: float_or_none(

2796

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2797

chapter_title=lambda chapter: traverse_obj(

2798

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2799

duration=duration)

2800

2801

def _extract_chapters_from_engagement_panel(self, data, duration):

2802

content_list = traverse_obj(

2803

data,

2804

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2805

expected_type=list, default=[])

2806

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2807

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2808

2809

return next(filter(None, (

2810

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2811

chapter_time, chapter_title, duration)

2812

for contents in content_list)), [])

2813

2814

def _extract_chapters_from_description(self, description, duration):

2815

return self._extract_chapters(

2816

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2817

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2818

duration=duration, strict=False)

2819

2820

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2825

'title': chapter_title(chapter),

2826

} for chapter in chapter_list or []]

2827

if not strict:

2828

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2829

2830

chapters = [{'start_time': 0}]

2831

for idx, chapter in enumerate(chapter_list):

2832

if chapter['start_time'] is None:

2833

self.report_warning(f'Incomplete chapter {idx}')

2834

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2835

chapters.append(chapter)

2836

else:

2837

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2838

return chapters[1:]

2839

2840

def _extract_comment(self, comment_renderer, parent=None):

2841

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2846

2847

# note: timestamp is an estimate calculated from the current time and time_text

2848

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2849

author = self._get_text(comment_renderer, 'authorText')

2850

author_id = try_get(comment_renderer,

2851

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2852

2853

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2854

lambda x: x['likeCount']), str)) or 0

2855

author_thumbnail = try_get(comment_renderer,

2856

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2857

2858

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2859

is_favorited = 'creatorHeart' in (try_get(

2860

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2865

'time_text': time_text,

2866

'like_count': votes,

2867

'is_favorited': is_favorited,

2868

'author': author,

2869

'author_id': author_id,

2870

'author_thumbnail': author_thumbnail,

2871

'author_is_uploader': author_is_uploader,

2872

'parent': parent or 'root'

2873

}

2874

2875

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2876

2877

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2878

2879

def extract_header(contents):

2880

_continuation = None

2881

for content in contents:

2882

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2883

expected_comment_count = self._get_count(

2884

comments_header_renderer, 'countText', 'commentsCount')

2885

2886

if expected_comment_count:

2887

tracker['est_total'] = expected_comment_count

2888

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2889

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2890

2891

sort_menu_item = try_get(

2892

comments_header_renderer,

2893

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2894

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2895

2896

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2897

if not _continuation:

2898

continue

2899

2900

sort_text = str_or_none(sort_menu_item.get('title'))

2901

if not sort_text:

2902

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2903

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2908

if not parent:

2909

tracker['current_page_thread'] = 0

2910

for content in contents:

2911

if not parent and tracker['total_parent_comments'] >= max_parents:

2912

yield

2913

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2914

comment_renderer = get_first(

2915

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2916

expected_type=dict, default={})

2917

2918

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2923

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2924

yield comment

2925

2926

# Attempt to get the replies

2927

comment_replies_renderer = try_get(

2928

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2929

2930

if comment_replies_renderer:

2931

tracker['current_page_thread'] += 1

2932

comment_entries_iter = self._comment_entries(

2933

comment_replies_renderer, ytcfg, video_id,

2934

parent=comment.get('id'), tracker=tracker)

2935

yield from itertools.islice(comment_entries_iter, min(

2936

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2937

2938

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2944

total_parent_comments=0,

2945

total_reply_comments=0)

2946

2947

# TODO: Deprecated

2948

# YouTube comments have a max depth of 2

2949

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2950

if max_depth:

2951

self._downloader.deprecation_warning(

2952

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2953

if max_depth == 1 and parent:

2954

return

2955

2956

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2957

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2958

2959

continuation = self._extract_continuation(root_continuation_data)

2960

2961

response = None

2962

is_forced_continuation = False

2963

is_first_continuation = parent is None

2964

if is_first_continuation and not continuation:

2965

# Sometimes you can get comments by generating the continuation yourself,

2966

# even if YouTube initially reports them being disabled - e.g. stories comments.

2967

# Note: if the comment section is actually disabled, YouTube may return a response with

2968

# required check_get_keys missing. So we will disable that check initially in this case.

2969

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2970

is_forced_continuation = True

2971

2972

for page_num in itertools.count(0):

2973

if not continuation:

2974

break

2975

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2976

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2977

if page_num == 0:

2978

if is_first_continuation:

2979

note_prefix = 'Downloading comment section API JSON'

2980

else:

2981

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2982

tracker['current_page_thread'], comment_prog_str)

2983

else:

2984

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2985

' ' if parent else '', ' replies' if parent else '',

2986

page_num, comment_prog_str)

2987

2988

response = self._extract_response(

2989

item_id=None, query=continuation,

2990

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2991

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2992

is_forced_continuation = False

2993

continuation_contents = traverse_obj(

2994

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2995

2996

continuation = None

2997

for continuation_section in continuation_contents:

2998

continuation_items = traverse_obj(

2999

continuation_section,

3000

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

3001

get_all=False, expected_type=list) or []

3002

if is_first_continuation:

3003

continuation = extract_header(continuation_items)

3004

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

3018

if message and not parent and tracker['running_total'] == 0:

3019

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

3020

3021

@staticmethod

3022

def _generate_comment_continuation(video_id):

3023

"""

3024

Generates initial comment section continuation token from given video id

3025

"""

3026

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3027

return base64.b64encode(token.encode()).decode()

3028

3029

def _get_comments(self, ytcfg, video_id, contents, webpage):

3030

"""Entry for comment extraction"""

3031

def _real_comment_extract(contents):

3032

renderer = next((

3033

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3034

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3035

yield from self._comment_entries(renderer, ytcfg, video_id)

3036

3037

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3038

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3039

3040

@staticmethod

3041

def _get_checkok_params():

3042

return {'contentCheckOk': True, 'racyCheckOk': True}

3043

3044

@classmethod

3045

def _generate_player_context(cls, sts=None):

3046

context = {

3047

'html5Preference': 'HTML5_PREF_WANTS',

3048

}

3049

if sts is not None:

3050

context['signatureTimestamp'] = sts

3051

return {

3052

'playbackContext': {

3053

'contentPlaybackContext': context

3054

},

3055

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3060

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3061

return True

3062

3063

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3064

AGE_GATE_REASONS = (

3065

'confirm your age', 'age-restricted', 'inappropriate', # reason

3066

'age_verification_required', 'age_check_required', # status

3067

)

3068

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3069

3070

@staticmethod

3071

def _is_unplayable(player_response):

3072

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3073

3074

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3075

3076

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3077

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3078

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3079

headers = self.generate_api_headers(

3080

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3085

}

3086

yt_query.update(self._generate_player_context(sts))

3087

return self._extract_response(

3088

item_id=video_id, ep='player', query=yt_query,

3089

ytcfg=player_ytcfg, headers=headers, fatal=True,

3090

default_client=client,

3091

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3092

) or None

3093

3094

def _get_requested_clients(self, url, smuggled_data):

3095

requested_clients = []

3096

default = ['android', 'web']

3097

allowed_clients = sorted(

3098

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3099

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3100

for client in self._configuration_arg('player_client'):

3101

if client in allowed_clients:

3102

requested_clients.append(client)

3103

elif client == 'default':

3104

requested_clients.extend(default)

3105

elif client == 'all':

3106

requested_clients.extend(allowed_clients)

3107

else:

3108

self.report_warning(f'Skipping unsupported client {client}')

3109

if not requested_clients:

3110

requested_clients = default

3111

3112

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3113

requested_clients.extend(

3114

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3115

3116

return orderedSet(requested_clients)

3117

3118

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3119

initial_pr = None

3120

if webpage:

3121

initial_pr = self._search_json(

3122

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3123

3124

all_clients = set(clients)

3125

clients = clients[::-1]

3126

prs = []

3127

3128

def append_client(*client_names):

3129

""" Append the first client name that exists but not already used """

3130

for client_name in client_names:

3131

actual_client = _split_innertube_client(client_name)[0]

3132

if actual_client in INNERTUBE_CLIENTS:

3133

if actual_client not in all_clients:

3134

clients.append(client_name)

3135

all_clients.add(actual_client)

3136

return

3137

3138

# Android player_response does not have microFormats which are needed for

3139

# extraction of some data. So we return the initial_pr with formats

3140

# stripped out even if not requested by the user

3141

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3142

if initial_pr:

3143

pr = dict(initial_pr)

3144

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3149

player_url = None

3150

while clients:

3151

client, base_client, variant = _split_innertube_client(clients.pop())

3152

player_ytcfg = master_ytcfg if client == 'web' else {}

3153

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3154

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3155

3156

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3157

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3158

if 'js' in self._configuration_arg('player_skip'):

3159

require_js_player = False

3160

player_url = None

3161

3162

if not player_url and not tried_iframe_fallback and require_js_player:

3163

player_url = self._download_player_url(video_id)

3164

tried_iframe_fallback = True

3165

3166

try:

3167

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3168

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3169

except ExtractorError as e:

3170

if last_error:

3171

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3177

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3178

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3179

if pr_video_id and pr_video_id != video_id:

3180

self.report_warning(

3181

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3186

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3187

append_client(f'{base_client}_creator')

3188

elif self._is_agegated(pr):

3189

if variant == 'tv_embedded':

3190

append_client(f'{base_client}_embedded')

3191

elif not variant:

3192

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3198

return prs, player_url

3199

3200

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):

3201

itags, stream_ids = {}, []

3202

itag_qualities, res_qualities = {}, {0: None}

3203

q = qualities([

3204

# Normally tiny is the smallest video-only formats. But

3205

# audio-only formats with unknown quality may get tagged as tiny

3206

'tiny',

3207

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3208

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3209

])

3210

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3211

3212

for fmt in streaming_formats:

3213

if fmt.get('targetDurationSec'):

3214

continue

3215

3216

itag = str_or_none(fmt.get('itag'))

3217

audio_track = fmt.get('audioTrack') or {}

3218

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3219

if stream_id in stream_ids:

3220

continue

3221

3222

quality = fmt.get('quality')

3223

height = int_or_none(fmt.get('height'))

3224

if quality == 'tiny' or not quality:

3225

quality = fmt.get('audioQuality', '').lower() or quality

3226

# The 3gp format (17) in android client has a quality of "small",

3227

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3233

if height:

3234

res_qualities[height] = quality

3235

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3236

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3237

# number of fragment that would subsequently requested with (`&sq=N`)

3238

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3239

continue

3240

3241

fmt_url = fmt.get('url')

3242

if not fmt_url:

3243

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3244

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3245

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3246

if not all((sc, fmt_url, player_url, encrypted_sig)):

3247

continue

3248

try:

3249

fmt_url += '&%s=%s' % (

3250

traverse_obj(sc, ('sp', -1)) or 'signature',

3251

self._decrypt_signature(encrypted_sig, video_id, player_url)

3252

)

3253

except ExtractorError as e:

3254

self.report_warning('Signature extraction failed: Some formats may be missing',

3255

video_id=video_id, only_once=True)

3256

self.write_debug(e, only_once=True)

3257

continue

3258

3259

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3264

fmt_url = update_url_query(fmt_url, {

3265

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3266

})

3267

except ExtractorError as e:

3268

phantomjs_hint = ''

3269

if isinstance(e, JSInterpreter.Exception):

3270

phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'

3271

self.report_warning(

3272

f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'

3273

f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3274

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3279

stream_ids.append(stream_id)

3280

3281

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3282

language_preference = (

3283

10 if audio_track.get('audioIsDefault') and 10

3284

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3285

else -1)

3286

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3287

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3288

# Make sure to avoid false positives with small duration differences.

3289

# E.g. __2ABJjxzNo, ySuUZEjARPY

3290

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3291

if is_damaged:

3292

self.report_warning(

3293

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3294

dct = {

3295

'asr': int_or_none(fmt.get('audioSampleRate')),

3296

'filesize': int_or_none(fmt.get('contentLength')),

3297

'format_id': itag,

3298

'format_note': join_nonempty(

3299

'%s%s' % (audio_track.get('displayName') or '',

3300

' (default)' if language_preference > 0 else ''),

3301

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3302

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3303

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3304

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3305

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3306

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3307

'fps': int_or_none(fmt.get('fps')) or None,

3308

'audio_channels': fmt.get('audioChannels'),

3309

'height': height,

3310

'quality': q(quality),

3311

'has_drm': bool(fmt.get('drmFamilies')),

3312

'tbr': tbr,

3313

'url': fmt_url,

3314

'width': int_or_none(fmt.get('width')),

3315

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3316

'desc' if language_preference < -1 else ''),

3317

'language_preference': language_preference,

3318

# Strictly de-prioritize damaged and 3gp formats

3319

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3320

}

3321

mime_mobj = re.match(

3322

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3323

if mime_mobj:

3324

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3325

dct.update(parse_codecs(mime_mobj.group(2)))

3326

no_audio = dct.get('acodec') == 'none'

3327

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3333

dct['downloader_options'] = {

3334

# Youtube throttles chunks >~10M

3335

'http_chunk_size': 10485760,

3336

}

3337

if dct.get('ext'):

3338

dct['container'] = dct['ext'] + '_dash'

3339

yield dct

3340

3341

live_from_start = is_live and self.get_param('live_from_start')

3342

skip_manifests = self._configuration_arg('skip')

3343

if not self.get_param('youtube_include_hls_manifest', True):

3344

skip_manifests.append('hls')

3345

if not self.get_param('youtube_include_dash_manifest', True):

3346

skip_manifests.append('dash')

3347

get_dash = 'dash' not in skip_manifests and (

3348

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3349

get_hls = not live_from_start and 'hls' not in skip_manifests

3350

3351

def process_manifest_format(f, proto, itag):

3352

if itag in itags:

3353

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3354

return False

3355

itag = f'{itag}-{proto}'

3356

if itag:

3357

f['format_id'] = itag

3358

itags[itag] = proto

3359

3360

f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))

3361

if f['quality'] == -1 and f.get('height'):

3362

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3367

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3368

if hls_manifest_url:

3369

fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)

3370

subtitles = self._merge_subtitles(subs, subtitles)

3371

for f in fmts:

3372

if process_manifest_format(f, 'hls', self._search_regex(

3373

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3374

yield f

3375

3376

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3377

if dash_manifest_url:

3378

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3379

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3380

for f in formats:

3381

if process_manifest_format(f, 'dash', f['format_id']):

3382

f['filesize'] = int_or_none(self._search_regex(

3383

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3384

if live_from_start:

3385

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3391

spec = get_first(

3392

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3393

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3398

args = args.split('#')

3399

counts = list(map(int_or_none, args[:5]))

3400

if len(args) != 8 or not all(counts):

3401

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3402

continue

3403

width, height, frame_count, cols, rows = counts

3404

N, sigh = args[6:]

3405

3406

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3407

fragment_count = frame_count / (cols * rows)

3408

fragment_duration = duration / fragment_count

3409

yield {

3410

'format_id': f'sb{i}',

3411

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3424

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3425

} for j in range(math.ceil(fragment_count))],

3426

}

3427

3428

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3429

webpage = None

3430

if 'webpage' not in self._configuration_arg('player_skip'):

3431

webpage = self._download_webpage(

3432

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3433

3434

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3435

3436

player_responses, player_url = self._extract_player_responses(

3437

self._get_requested_clients(url, smuggled_data),

3438

video_id, webpage, master_ytcfg)

3439

3440

return webpage, master_ytcfg, player_responses, player_url

3441

3442

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3443

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3444

is_live = get_first(video_details, 'isLive')

3445

if is_live is None:

3446

is_live = get_first(live_broadcast_details, 'isLiveNow')

3447

3448

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3449

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)

3450

3451

return live_broadcast_details, is_live, streaming_data, formats, subtitles

3452

3453

def _real_extract(self, url):

3454

url, smuggled_data = unsmuggle_url(url, {})

3455

video_id = self._match_id(url)

3456

3457

base_url = self.http_scheme() + '//www.youtube.com/'

3458

webpage_url = base_url + 'watch?v=' + video_id

3459

3460

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3461

3462

playability_statuses = traverse_obj(

3463

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3464

3465

trailer_video_id = get_first(

3466

playability_statuses,

3467

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3468

expected_type=str)

3469

if trailer_video_id:

3470

return self.url_result(

3471

trailer_video_id, self.ie_key(), trailer_video_id)

3472

3473

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3474

if webpage else (lambda x: None))

3475

3476

video_details = traverse_obj(

3477

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3478

microformats = traverse_obj(

3479

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3480

expected_type=dict, default=[])

3481

video_title = (

3482

get_first(video_details, 'title')

3483

or self._get_text(microformats, (..., 'title'))

3484

or search_meta(['og:title', 'twitter:title', 'title']))

3485

video_description = get_first(video_details, 'shortDescription')

3486

3487

multifeed_metadata_list = get_first(

3488

player_responses,

3489

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3490

expected_type=str)

3491

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3492

if self.get_param('noplaylist'):

3493

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3498

# Unquote should take place before split on comma (,) since textual

3499

# fields may contain comma as well (see

3500

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3501

feed_data = urllib.parse.parse_qs(

3502

urllib.parse.unquote_plus(feed))

3503

3504

def feed_entry(name):

3505

return try_get(

3506

feed_data, lambda x: x[name][0], str)

3507

3508

feed_id = feed_entry('id')

3509

if not feed_id:

3510

continue

3511

feed_title = feed_entry('title')

3512

title = video_title

3513

if feed_title:

3514

title += ' (%s)' % feed_title

3515

entries.append({

3516

'_type': 'url_transparent',

3517

'ie_key': 'Youtube',

3518

'url': smuggle_url(

3519

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3520

{'force_singlefeed': True}),

3521

'title': title,

3522

})

3523

feed_ids.append(feed_id)

3524

self.to_screen(

3525

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3526

% (', '.join(feed_ids), video_id))

3527

return self.playlist_result(

3528

entries, video_id, video_title, video_description)

3529

3530

duration = int_or_none(

3531

get_first(video_details, 'lengthSeconds')

3532

or get_first(microformats, 'lengthSeconds')

3533

or parse_duration(search_meta('duration'))) or None

3534

3535

live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \

3536

self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3537

3538

if not formats:

3539

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3540

self.report_drm(video_id)

3541

pemr = get_first(

3542

playability_statuses,

3543

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3544

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3545

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3546

if subreason:

3547

if subreason == 'The uploader has not made this video available in your country.':

3548

countries = get_first(microformats, 'availableCountries')

3549

if not countries:

3550

regions_allowed = search_meta('regionsAllowed')

3551

countries = regions_allowed.split(',') if regions_allowed else None

3552

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3553

reason += f'. {subreason}'

3554

if reason:

3555

self.raise_no_formats(reason, expected=True)

3556

3557

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3558

if not keywords and webpage:

3559

keywords = [

3560

unescapeHTML(m.group('content'))

3561

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3562

for keyword in keywords:

3563

if keyword.startswith('yt:stretch='):

3564

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3565

if mobj:

3566

# NB: float is intentional for forcing float division

3567

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3572

f['stretched_ratio'] = ratio

3573

break

3574

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3575

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3576

if thumbnail_url:

3577

thumbnails.append({

3578

'url': thumbnail_url,

3579

})

3580

original_thumbnails = thumbnails.copy()

3581

3582

# The best resolution thumbnails sometimes does not appear in the webpage

3583

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3584

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3585

thumbnail_names = [

3586

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3587

# in resolution, these are not the custom thumbnail. So de-prioritize them

3588

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3589

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3590

]

3591

n_thumbnail_names = len(thumbnail_names)

3592

thumbnails.extend({

3593

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3594

video_id=video_id, name=name, ext=ext,

3595

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3596

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3597

for thumb in thumbnails:

3598

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3599

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3600

self._remove_duplicate_formats(thumbnails)

3601

self._downloader._sort_thumbnails(original_thumbnails)

3602

3603

category = get_first(microformats, 'category') or search_meta('genre')

3604

channel_id = str_or_none(

3605

get_first(video_details, 'channelId')

3606

or get_first(microformats, 'externalChannelId')

3607

or search_meta('channelId'))

3608

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3609

3610

live_content = get_first(video_details, 'isLiveContent')

3611

is_upcoming = get_first(video_details, 'isUpcoming')

3612

if is_live is None:

3613

if is_upcoming or live_content is False:

3614

is_live = False

3615

if is_upcoming is None and (live_content or is_live):

3616

is_upcoming = False

3617

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3618

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3619

if not duration and live_end_time and live_start_time:

3620

duration = live_end_time - live_start_time

3621

3622

if is_live and self.get_param('live_from_start'):

3623

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3624

3625

formats.extend(self._extract_storyboard(player_responses, duration))

3626

3627

# source_preference is lower for throttled/potentially damaged formats

3628

self._sort_formats(formats, (

3629

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3634

'formats': formats,

3635

'thumbnails': thumbnails,

3636

# The best thumbnail that we are sure exists. Prevents unnecessary

3637

# URL checking if user don't care about getting the best possible thumbnail

3638

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3639

'description': video_description,

3640

'uploader': get_first(video_details, 'author'),

3641

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3642

'uploader_url': owner_profile_url,

3643

'channel_id': channel_id,

3644

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3645

'duration': duration,

3646

'view_count': int_or_none(

3647

get_first((video_details, microformats), (..., 'viewCount'))

3648

or search_meta('interactionCount')),

3649

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3650

'age_limit': 18 if (

3651

get_first(microformats, 'isFamilySafe') is False

3652

or search_meta('isFamilyFriendly') == 'false'

3653

or search_meta('og:restrictions:age') == '18+') else 0,

3654

'webpage_url': webpage_url,

3655

'categories': [category] if category else None,

3656

'tags': keywords,

3657

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3658

'is_live': is_live,

3659

'was_live': (False if is_live or is_upcoming or live_content is False

3660

else None if is_live is None or is_upcoming is None

3661

else live_content),

3662

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3663

'release_timestamp': live_start_time,

3664

}

3665

3666

if get_first(video_details, 'isPostLiveDvr'):

3667

self.write_debug('Video is in Post-Live Manifestless mode')

3668

info['live_status'] = 'post_live'

3669

if (duration or 0) > 4 * 3600:

3670

self.report_warning(

3671

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3672

'This is a known issue and patches are welcome')

3673

3674

subtitles = {}

3675

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3676

if pctr:

3677

def get_lang_code(track):

3678

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3679

or track.get('languageCode'))

3680

3681

# Converted into dicts to remove duplicates

3682

captions = {

3683

get_lang_code(sub): sub

3684

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3685

translation_languages = {

3686

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3687

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3688

3689

def process_language(container, base_url, lang_code, sub_name, query):

3690

lang_subs = container.setdefault(lang_code, [])

3691

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3702

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3703

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3704

for lang_code, caption_track in captions.items():

3705

base_url = caption_track.get('baseUrl')

3706

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3707

if not base_url:

3708

continue

3709

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3710

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3715

if not caption_track.get('isTranslatable'):

3716

continue

3717

for trans_code, trans_name in translation_languages.items():

3718

if not trans_code:

3719

continue

3720

orig_trans_code = trans_code

3721

if caption_track.get('kind') != 'asr':

3722

if not get_translated_subs:

3723

continue

3724

trans_code += f'-{lang_code}'

3725

trans_name += format_field(lang_name, None, ' from %s')

3726

# Add an "-orig" label to the original language so that it can be distinguished.

3727

# The subs are returned without "-orig" as well for compatibility

3728

if lang_code == f'a-{orig_trans_code}':

3729

process_language(

3730

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3731

# Setting tlang=lang returns damaged subtitles.

3732

process_language(automatic_captions, base_url, trans_code, trans_name,

3733

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3734

3735

info['automatic_captions'] = automatic_captions

3736

info['subtitles'] = subtitles

3737

3738

parsed_url = urllib.parse.urlparse(url)

3739

for component in [parsed_url.fragment, parsed_url.query]:

3740

query = urllib.parse.parse_qs(component)

3741

for k, v in query.items():

3742

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3743

d_k += '_time'

3744

if d_k not in info and k in s_ks:

3745

info[d_k] = parse_duration(query[k][0])

3746

3747

# Youtube Music Auto-generated description

3748

if video_description:

3749

mobj = re.search(

3750

r'''(?xs)

3751

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3752

(?P<album>[^\n]+)

3753

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3754

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3755

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3756

.+\nAuto-generated\ by\ YouTube\.\s*$

3757

''', video_description)

3758

if mobj:

3759

release_year = mobj.group('release_year')

3760

release_date = mobj.group('release_date')

3761

if release_date:

3762

release_date = release_date.replace('-', '')

3763

if not release_year:

3764

release_year = release_date[:4]

3765

info.update({

3766

'album': mobj.group('album'.strip()),

3767

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3768

'track': mobj.group('track').strip(),

3769

'release_date': release_date,

3770

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3776

if not initial_data:

3777

query = {'videoId': video_id}

3778

query.update(self._get_checkok_params())

3779

initial_data = self._extract_response(

3780

item_id=video_id, ep='next', fatal=False,

3781

ytcfg=master_ytcfg, query=query,

3782

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3783

note='Downloading initial data API JSON')

3784

3785

info['comment_count'] = traverse_obj(initial_data, (

3786

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3787

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3788

), (

3789

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3790

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3791

), expected_type=int_or_none, get_all=False)

3792

3793

try: # This will error if there is no livechat

3794

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3795

except (KeyError, IndexError, TypeError):

3796

pass

3797

else:

3798

info.setdefault('subtitles', {})['live_chat'] = [{

3799

# url is needed to set cookies

3800

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3801

'video_id': video_id,

3802

'ext': 'json',

3803

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3809

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3810

or self._extract_chapters_from_description(video_description, duration)

3811

or None)

3812

3813

contents = traverse_obj(

3814

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3815

expected_type=list, default=[])

3816

3817

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3818

if vpir:

3819

stl = vpir.get('superTitleLink')

3820

if stl:

3821

stl = self._get_text(stl)

3822

if try_get(

3823

vpir,

3824

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3825

info['location'] = stl

3826

else:

3827

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3828

if mobj:

3829

info.update({

3830

'series': mobj.group(1),

3831

'season_number': int(mobj.group(2)),

3832

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3837

list) or []):

3838

tbr = tlb.get('toggleButtonRenderer') or {}

3839

for getter, regex in [(

3840

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3841

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3842

lambda x: x['accessibility'],

3843

lambda x: x['accessibilityData']['accessibilityData'],

3844

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3845

label = (try_get(tbr, getter, dict) or {}).get('label')

3846

if label:

3847

mobj = re.match(regex, label)

3848

if mobj:

3849

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3850

break

3851

sbr_tooltip = try_get(

3852

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3853

if sbr_tooltip:

3854

like_count, dislike_count = sbr_tooltip.split(' / ')

3855

info.update({

3856

'like_count': str_to_int(like_count),

3857

'dislike_count': str_to_int(dislike_count),

3858

})

3859

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3860

if vsir:

3861

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3862

info.update({

3863

'channel': self._get_text(vor, 'title'),

3864

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3869

list) or []

3870

multiple_songs = False

3871

for row in rows:

3872

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3873

multiple_songs = True

3874

break

3875

for row in rows:

3876

mrr = row.get('metadataRowRenderer') or {}

3877

mrr_title = mrr.get('title')

3878

if not mrr_title:

3879

continue

3880

mrr_title = self._get_text(mrr, 'title')

3881

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3882

if mrr_title == 'License':

3883

info['license'] = mrr_contents_text

3884

elif not multiple_songs:

3885

if mrr_title == 'Album':

3886

info['album'] = mrr_contents_text

3887

elif mrr_title == 'Artist':

3888

info['artist'] = mrr_contents_text

3889

elif mrr_title == 'Song':

3890

info['track'] = mrr_contents_text

3891

3892

fallbacks = {

3893

'channel': 'uploader',

3894

'channel_id': 'uploader_id',

3895

'channel_url': 'uploader_url',

3896

}

3897

3898

# The upload date for scheduled, live and past live streams / premieres in microformats

3899

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3900

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3901

upload_date = (

3902

unified_strdate(get_first(microformats, 'uploadDate'))

3903

or unified_strdate(search_meta('uploadDate')))

3904

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3905

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3906

info['upload_date'] = upload_date

3907

3908

for to, frm in fallbacks.items():

3909

if not info.get(to):

3910

info[to] = info.get(frm)

3911

3912

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3918

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3919

is_membersonly = None

3920

is_premium = None

3921

if initial_data and is_private is not None:

3922

is_membersonly = False

3923

is_premium = False

3924

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3925

badge_labels = set()

3926

for content in contents:

3927

if not isinstance(content, dict):

3928

continue

3929

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3930

for badge_label in badge_labels:

3931

if badge_label.lower() == 'members only':

3932

is_membersonly = True

3933

elif badge_label.lower() == 'premium':

3934

is_premium = True

3935

elif badge_label.lower() == 'unlisted':

3936

is_unlisted = True

3937

3938

info['availability'] = self._availability(

3939

is_private=is_private,

3940

needs_premium=is_premium,

3941

needs_subscription=is_membersonly,

3942

needs_auth=info['age_limit'] >= 18,

3943

is_unlisted=None if is_private is None else is_unlisted)

3944

3945

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3946

3947

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3953

3954

@staticmethod

3955

def passthrough_smuggled_data(func):

3956

def _smuggle(entries, smuggled_data):

3957

for entry in entries:

3958

# TODO: Convert URL to music.youtube instead.

3959

# Do we need to passthrough any other smuggled_data?

3960

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3961

yield entry

3962

3963

@functools.wraps(func)

3964

def wrapper(self, url):

3965

url, smuggled_data = unsmuggle_url(url, {})

3966

if self.is_music_url(url):

3967

smuggled_data['is_music_url'] = True

3968

info_dict = func(self, url, smuggled_data)

3969

if smuggled_data and info_dict.get('entries'):

3970

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3975

channel_id = self._html_search_meta(

3976

'channelId', webpage, 'channel id', default=None)

3977

if channel_id:

3978

return channel_id

3979

channel_url = self._html_search_meta(

3980

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3981

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3982

'twitter:app:url:googleplay'), webpage, 'channel url')

3983

return self._search_regex(

3984

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3985

channel_url, 'channel id')

3986

3987

@staticmethod

3988

def _extract_basic_item_renderer(item):

3989

# Modified from _extract_grid_item_renderer

3990

known_basic_renderers = (

3991

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3992

)

3993

for key, renderer in item.items():

3994

if not isinstance(renderer, dict):

3995

continue

3996

elif key in known_basic_renderers:

3997

return renderer

3998

elif key.startswith('grid') and key.endswith('Renderer'):

3999

return renderer

4000

4001

def _grid_entries(self, grid_renderer):

4002

for item in grid_renderer['items']:

4003

if not isinstance(item, dict):

4004

continue

4005

renderer = self._extract_basic_item_renderer(item)

4006

if not isinstance(renderer, dict):

4007

continue

4008

title = self._get_text(renderer, 'title')

4009

4010

# playlist

4011

playlist_id = renderer.get('playlistId')

4012

if playlist_id:

4013

yield self.url_result(

4014

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4015

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

4020

if video_id:

4021

yield self._extract_video(renderer)

4022

continue

4023

# channel

4024

channel_id = renderer.get('channelId')

4025

if channel_id:

4026

yield self.url_result(

4027

'https://www.youtube.com/channel/%s' % channel_id,

4028

ie=YoutubeTabIE.ie_key(), video_title=title)

4029

continue

4030

# generic endpoint URL support

4031

ep_url = urljoin('https://www.youtube.com/', try_get(

4032

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4033

str))

4034

if ep_url:

4035

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4036

if ie.suitable(ep_url):

4037

yield self.url_result(

4038

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4039

break

4040

4041

def _music_reponsive_list_entry(self, renderer):

4042

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4043

if video_id:

4044

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4045

ie=YoutubeIE.ie_key(), video_id=video_id)

4046

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4047

if playlist_id:

4048

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4049

if video_id:

4050

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4051

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4052

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4053

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4054

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4055

if browse_id:

4056

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4057

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4058

4059

def _shelf_entries_from_content(self, shelf_renderer):

4060

content = shelf_renderer.get('content')

4061

if not isinstance(content, dict):

4062

return

4063

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4064

if renderer:

4065

# TODO: add support for nested playlists so each shelf is processed

4066

# as separate playlist

4067

# TODO: this includes only first N items

4068

yield from self._grid_entries(renderer)

4069

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4075

ep = try_get(

4076

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4077

str)

4078

shelf_url = urljoin('https://www.youtube.com', ep)

4079

if shelf_url:

4080

# Skipping links to another channels, note that checking for

4081

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4082

# will not work

4083

if skip_channels and '/channels?' in shelf_url:

4084

return

4085

title = self._get_text(shelf_renderer, 'title')

4086

yield self.url_result(shelf_url, video_title=title)

4087

# Shelf may not contain shelf URL, fallback to extraction from content

4088

yield from self._shelf_entries_from_content(shelf_renderer)

4089

4090

def _playlist_entries(self, video_list_renderer):

4091

for content in video_list_renderer['contents']:

4092

if not isinstance(content, dict):

4093

continue

4094

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4095

if not isinstance(renderer, dict):

4096

continue

4097

video_id = renderer.get('videoId')

4098

if not video_id:

4099

continue

4100

yield self._extract_video(renderer)

4101

4102

def _rich_entries(self, rich_grid_renderer):

4103

renderer = try_get(

4104

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4105

video_id = renderer.get('videoId')

4106

if not video_id:

4107

return

4108

yield self._extract_video(renderer)

4109

4110

def _video_entry(self, video_renderer):

4111

video_id = video_renderer.get('videoId')

4112

if video_id:

4113

return self._extract_video(video_renderer)

4114

4115

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4116

url = urljoin('https://youtube.com', traverse_obj(

4117

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4118

if url:

4119

return self.url_result(

4120

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4121

4122

def _post_thread_entries(self, post_thread_renderer):

4123

post_renderer = try_get(

4124

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4125

if not post_renderer:

4126

return

4127

# video attachment

4128

video_renderer = try_get(

4129

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4130

video_id = video_renderer.get('videoId')

4131

if video_id:

4132

entry = self._extract_video(video_renderer)

4133

if entry:

4134

yield entry

4135

# playlist attachment

4136

playlist_id = try_get(

4137

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4138

if playlist_id:

4139

yield self.url_result(

4140

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4141

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4142

# inline video links

4143

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4144

for run in runs:

4145

if not isinstance(run, dict):

4146

continue

4147

ep_url = try_get(

4148

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4149

if not ep_url:

4150

continue

4151

if not YoutubeIE.suitable(ep_url):

4152

continue

4153

ep_video_id = YoutubeIE._match_id(ep_url)

4154

if video_id == ep_video_id:

4155

continue

4156

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4157

4158

def _post_thread_continuation_entries(self, post_thread_continuation):

4159

contents = post_thread_continuation.get('contents')

4160

if not isinstance(contents, list):

4161

return

4162

for content in contents:

4163

renderer = content.get('backstagePostThreadRenderer')

4164

if isinstance(renderer, dict):

4165

yield from self._post_thread_entries(renderer)

4166

continue

4167

renderer = content.get('videoRenderer')

4168

if isinstance(renderer, dict):

4169

yield self._video_entry(renderer)

4170

4171

r''' # unused

4172

def _rich_grid_entries(self, contents):

4173

for content in contents:

4174

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4175

if video_renderer:

4176

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4182

# continuation_list is modified in-place with continuation_list = [continuation_token]

4183

continuation_list[:] = [None]

4184

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4185

for content in contents:

4186

if not isinstance(content, dict):

4187

continue

4188

is_renderer = traverse_obj(

4189

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4190

expected_type=dict)

4191

if not is_renderer:

4192

renderer = content.get('richItemRenderer')

4193

if renderer:

4194

for entry in self._rich_entries(renderer):

4195

yield entry

4196

continuation_list[0] = self._extract_continuation(parent_renderer)

4197

continue

4198

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4199

for isr_content in isr_contents:

4200

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4205

'gridRenderer': self._grid_entries,

4206

'reelShelfRenderer': self._grid_entries,

4207

'shelfRenderer': self._shelf_entries,

4208

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4209

'backstagePostThreadRenderer': self._post_thread_entries,

4210

'videoRenderer': lambda x: [self._video_entry(x)],

4211

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4212

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4213

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4214

}

4215

for key, renderer in isr_content.items():

4216

if key not in known_renderers:

4217

continue

4218

for entry in known_renderers[key](renderer):

4219

if entry:

4220

yield entry

4221

continuation_list[0] = self._extract_continuation(renderer)

4222

break

4223

4224

if not continuation_list[0]:

4225

continuation_list[0] = self._extract_continuation(is_renderer)

4226

4227

if not continuation_list[0]:

4228

continuation_list[0] = self._extract_continuation(parent_renderer)

4229

4230

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4231

continuation_list = [None]

4232

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4233

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4238

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4239

yield from extract_entries(parent_renderer)

4240

continuation = continuation_list[0]

4241

4242

for page_num in itertools.count(1):

4243

if not continuation:

4244

break

4245

headers = self.generate_api_headers(

4246

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4247

response = self._extract_response(

4248

item_id=f'{item_id} page {page_num}',

4249

query=continuation, headers=headers, ytcfg=ytcfg,

4250

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4255

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4256

visitor_data = self._extract_visitor_data(response) or visitor_data

4257

4258

known_continuation_renderers = {

4259

'playlistVideoListContinuation': self._playlist_entries,

4260

'gridContinuation': self._grid_entries,

4261

'itemSectionContinuation': self._post_thread_continuation_entries,

4262

'sectionListContinuation': extract_entries, # for feeds

4263

}

4264

continuation_contents = try_get(

4265

response, lambda x: x['continuationContents'], dict) or {}

4266

continuation_renderer = None

4267

for key, value in continuation_contents.items():

4268

if key not in known_continuation_renderers:

4269

continue

4270

continuation_renderer = value

4271

continuation_list = [None]

4272

yield from known_continuation_renderers[key](continuation_renderer)

4273

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4274

break

4275

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4280

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4281

'gridVideoRenderer': (self._grid_entries, 'items'),

4282

'gridChannelRenderer': (self._grid_entries, 'items'),

4283

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4284

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4285

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4286

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4287

}

4288

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4289

continuation_items = try_get(

4290

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4291

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4292

video_items_renderer = None

4293

for key, value in continuation_item.items():

4294

if key not in known_renderers:

4295

continue

4296

video_items_renderer = {known_renderers[key][1]: continuation_items}

4297

continuation_list = [None]

4298

yield from known_renderers[key][0](video_items_renderer)

4299

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4300

break

4301

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4307

for tab in tabs:

4308

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4309

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4314

4315

def _extract_uploader(self, data):

4316

uploader = {}

4317

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4318

owner = try_get(

4319

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4320

if owner:

4321

owner_text = owner.get('text')

4322

uploader['uploader'] = self._search_regex(

4323

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4324

uploader['uploader_id'] = try_get(

4325

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4326

uploader['uploader_url'] = urljoin(

4327

'https://www.youtube.com/',

4328

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4329

return {k: v for k, v in uploader.items() if v is not None}

4330

4331

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4332

playlist_id = title = description = channel_url = channel_name = channel_id = None

4333

tags = []

4334

4335

selected_tab = self._extract_selected_tab(tabs)

4336

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4337

renderer = try_get(

4338

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4339

if renderer:

4340

channel_name = renderer.get('title')

4341

channel_url = renderer.get('channelUrl')

4342

channel_id = renderer.get('externalId')

4343

else:

4344

renderer = try_get(

4345

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4346

4347

if renderer:

4348

title = renderer.get('title')

4349

description = renderer.get('description', '')

4350

playlist_id = channel_id

4351

tags = renderer.get('keywords', '').split()

4352

4353

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4354

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4355

def _get_uncropped(url):

4356

return url_or_none((url or '').split('=')[0] + '=s0')

4357

4358

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4359

if avatar_thumbnails:

4360

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4361

if uncropped_avatar:

4362

avatar_thumbnails.append({

4363

'url': uncropped_avatar,

4364

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4369

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4370

for banner in channel_banners:

4371

banner['preference'] = -10

4372

4373

if channel_banners:

4374

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4375

if uncropped_banner:

4376

channel_banners.append({

4377

'url': uncropped_banner,

4378

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4383

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4384

4385

if playlist_id is None:

4386

playlist_id = item_id

4387

4388

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4389

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4390

if title is None:

4391

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4392

title += format_field(selected_tab, 'title', ' - %s')

4393

title += format_field(selected_tab, 'expandedText', ' - %s')

4394

4395

metadata = {

4396

'playlist_id': playlist_id,

4397

'playlist_title': title,

4398

'playlist_description': description,

4399

'uploader': channel_name,

4400

'uploader_id': channel_id,

4401

'uploader_url': channel_url,

4402

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4403

'tags': tags,

4404

'view_count': self._get_count(playlist_stats, 1),

4405

'availability': self._extract_availability(data),

4406

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4407

'playlist_count': self._get_count(playlist_stats, 0),

4408

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4409

}

4410

if not channel_id:

4411

metadata.update(self._extract_uploader(data))

4412

metadata.update({

4413

'channel': metadata['uploader'],

4414

'channel_id': metadata['uploader_id'],

4415

'channel_url': metadata['uploader_url']})

4416

return self.playlist_result(

4417

self._entries(

4418

selected_tab, playlist_id, ytcfg,

4419

self._extract_account_syncid(ytcfg, data),

4420

self._extract_visitor_data(data, ytcfg)),

4421

**metadata)

4422

4423

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4424

first_id = last_id = response = None

4425

for page_num in itertools.count(1):

4426

videos = list(self._playlist_entries(playlist))

4427

if not videos:

4428

return

4429

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4430

if start >= len(videos):

4431

return

4432

yield from videos[start:]

4433

first_id = first_id or videos[0]['id']

4434

last_id = videos[-1]['id']

4435

watch_endpoint = try_get(

4436

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4437

headers = self.generate_api_headers(

4438

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4439

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4440

query = {

4441

'playlistId': playlist_id,

4442

'videoId': watch_endpoint.get('videoId') or last_id,

4443

'index': watch_endpoint.get('index') or len(videos),

4444

'params': watch_endpoint.get('params') or 'OAE%3D'

4445

}

4446

response = self._extract_response(

4447

item_id='%s page %d' % (playlist_id, page_num),

4448

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4449

check_get_keys='contents'

4450

)

4451

playlist = try_get(

4452

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4453

4454

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4455

title = playlist.get('title') or try_get(

4456

data, lambda x: x['titleText']['simpleText'], str)

4457

playlist_id = playlist.get('playlistId') or item_id

4458

4459

# Delegating everything except mix playlists to regular tab-based playlist URL

4460

playlist_url = urljoin(url, try_get(

4461

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4462

str))

4463

4464

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4465

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4466

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4467

4468

if playlist_url and playlist_url != url and not is_known_unviewable:

4469

return self.url_result(

4470

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4471

video_title=title)

4472

4473

return self.playlist_result(

4474

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4475

playlist_id=playlist_id, playlist_title=title)

4476

4477

def _extract_availability(self, data):

4478

"""

4479

Gets the availability of a given playlist/tab.

4480

Note: Unless YouTube tells us explicitly, we do not assume it is public

4481

@param data: response

4482

"""

4483

is_private = is_unlisted = None

4484

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4485

badge_labels = self._extract_badges(renderer)

4486

4487

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4488

privacy_dropdown_entries = try_get(

4489

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4490

for renderer_dict in privacy_dropdown_entries:

4491

is_selected = try_get(

4492

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4493

if not is_selected:

4494

continue

4495

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4496

if label:

4497

badge_labels.add(label.lower())

4498

break

4499

4500

for badge_label in badge_labels:

4501

if badge_label == 'unlisted':

4502

is_unlisted = True

4503

elif badge_label == 'private':

4504

is_private = True

4505

elif badge_label == 'public':

4506

is_unlisted = is_private = False

4507

return self._availability(is_private, False, False, False, is_unlisted)

4508

4509

@staticmethod

4510

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4511

sidebar_renderer = try_get(

4512

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4513

for item in sidebar_renderer:

4514

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4519

"""

4520

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4521

"""

4522

browse_id = params = None

4523

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4524

if not renderer:

4525

return

4526

menu_renderer = try_get(

4527

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4528

for menu_item in menu_renderer:

4529

if not isinstance(menu_item, dict):

4530

continue

4531

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4532

text = try_get(

4533

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4534

if not text or text.lower() != 'show unavailable videos':

4535

continue

4536

browse_endpoint = try_get(

4537

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4538

browse_id = browse_endpoint.get('browseId')

4539

params = browse_endpoint.get('params')

4540

break

4541

4542

headers = self.generate_api_headers(

4543

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4544

visitor_data=self._extract_visitor_data(data, ytcfg))

4545

query = {

4546

'params': params or 'wgYCCAA=',

4547

'browseId': browse_id or 'VL%s' % item_id

4548

}

4549

return self._extract_response(

4550

item_id=item_id, headers=headers, query=query,

4551

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4552

note='Downloading API JSON with unavailable videos')

4553

4554

@functools.cached_property

4555

def skip_webpage(self):

4556

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4557

4558

def _extract_webpage(self, url, item_id, fatal=True):

4559

webpage, data = None, None

4560

for retry in self.RetryManager(fatal=fatal):

4561

try:

4562

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4563

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4564

except ExtractorError as e:

4565

if isinstance(e.cause, network_exceptions):

4566

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4567

retry.error = e

4568

continue

4569

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4574

except ExtractorError as e:

4575

self._error_or_warning(e, fatal=fatal)

4576

break

4577

4578

# Sometimes youtube returns a webpage with incomplete ytInitialData

4579

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4580

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4581

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4587

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4588

if not ytcfg and self.is_authenticated:

4589

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4590

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4591

raise ExtractorError(

4592

f'{msg}. If you are not downloading private content, or '

4593

'your cookies are only for the first account and channel,'

4594

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4595

expected=True)

4596

self.report_warning(msg, only_once=True)

4597

4598

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4599

data = None

4600

if not self.skip_webpage:

4601

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4602

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4603

# Reject webpage data if redirected to home page without explicitly requesting

4604

selected_tab = self._extract_selected_tab(traverse_obj(

4605

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4606

if (url != 'https://www.youtube.com/feed/recommended'

4607

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4608

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4609

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4610

if fatal:

4611

raise ExtractorError(msg, expected=True)

4612

self.report_warning(msg, only_once=True)

4613

if not data:

4614

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4615

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4616

return data, ytcfg

4617

4618

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4619

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4620

resolve_response = self._extract_response(

4621

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4622

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4623

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4624

for ep_key, ep in endpoints.items():

4625

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4626

if params:

4627

return self._extract_response(

4628

item_id=item_id, query=params, ep=ep, headers=headers,

4629

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4630

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4631

err_note = 'Failed to resolve url (does the playlist exist?)'

4632

if fatal:

4633

raise ExtractorError(err_note, expected=True)

4634

self.report_warning(err_note, item_id)

4635

4636

_SEARCH_PARAMS = None

4637

4638

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4639

data = {'query': query}

4640

if params is NO_DEFAULT:

4641

params = self._SEARCH_PARAMS

4642

if params:

4643

data['params'] = params

4644

4645

content_keys = (

4646

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4647

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4648

# ytmusic search

4649

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4650

('continuationContents', ),

4651

)

4652

display_id = f'query "{query}"'

4653

check_get_keys = tuple({keys[0] for keys in content_keys})

4654

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4655

self._report_playlist_authcheck(ytcfg, fatal=False)

4656

4657

continuation_list = [None]

4658

search = None

4659

for page_num in itertools.count(1):

4660

data.update(continuation_list[0] or {})

4661

headers = self.generate_api_headers(

4662

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4663

search = self._extract_response(

4664

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4665

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4666

slr_contents = traverse_obj(search, *content_keys)

4667

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4668

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4673

IE_DESC = 'YouTube Tabs'

4674

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4683

(?P<not_channel>

4684

feed/|hashtag/|

4685

(?:playlist|watch)\?.*?\blist=

4686

)|

4687

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4692

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4693

}

4694

IE_NAME = 'youtube:tab'

4695

4696

_TESTS = [{

4697

'note': 'playlists, multipage',

4698

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4699

'playlist_mincount': 94,

4700

'info_dict': {

4701

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4702

'title': 'Igor Kleiner - Playlists',

4703

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4704

'uploader': 'Igor Kleiner',

4705

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4706

'channel': 'Igor Kleiner',

4707

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4708

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4709

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4710

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4711

'channel_follower_count': int

4712

},

4713

}, {

4714

'note': 'playlists, multipage, different order',

4715

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4716

'playlist_mincount': 94,

4717

'info_dict': {

4718

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4719

'title': 'Igor Kleiner - Playlists',

4720

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4721

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4722

'uploader': 'Igor Kleiner',

4723

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4724

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4725

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4726

'channel': 'Igor Kleiner',

4727

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4728

'channel_follower_count': int

4729

},

4730

}, {

4731

'note': 'playlists, series',

4732

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4733

'playlist_mincount': 5,

4734

'info_dict': {

4735

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4736

'title': '3Blue1Brown - Playlists',

4737

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4738

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4739

'uploader': '3Blue1Brown',

4740

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4741

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4742

'channel': '3Blue1Brown',

4743

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4744

'tags': ['Mathematics'],

4745

'channel_follower_count': int

4746

},

4747

}, {

4748

'note': 'playlists, singlepage',

4749

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4750

'playlist_mincount': 4,

4751

'info_dict': {

4752

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4753

'title': 'ThirstForScience - Playlists',

4754

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4755

'uploader': 'ThirstForScience',

4756

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4757

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4758

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4759

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4760

'tags': 'count:13',

4761

'channel': 'ThirstForScience',

4762

'channel_follower_count': int

4763

}

4764

}, {

4765

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4766

'only_matching': True,

4767

}, {

4768

'note': 'basic, single video playlist',

4769

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4770

'info_dict': {

4771

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4772

'uploader': 'Sergey M.',

4773

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4774

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4779

'channel': 'Sergey M.',

4780

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4781

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4782

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4787

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4788

'info_dict': {

4789

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4790

'uploader': 'Sergey M.',

4791

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4792

'title': 'youtube-dl empty playlist',

4793

'tags': [],

4794

'channel': 'Sergey M.',

4795

'description': '',

4796

'modified_date': '20160902',

4797

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4798

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4799

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4805

'info_dict': {

4806

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4807

'title': 'lex will - Home',

4808

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4809

'uploader': 'lex will',

4810

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4811

'channel': 'lex will',

4812

'tags': ['bible', 'history', 'prophesy'],

4813

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4814

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4815

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4816

'channel_follower_count': int

4817

},

4818

'playlist_mincount': 2,

4819

}, {

4820

'note': 'Videos tab',

4821

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4822

'info_dict': {

4823

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4824

'title': 'lex will - Videos',

4825

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4826

'uploader': 'lex will',

4827

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4828

'tags': ['bible', 'history', 'prophesy'],

4829

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4830

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4831

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4832

'channel': 'lex will',

4833

'channel_follower_count': int

4834

},

4835

'playlist_mincount': 975,

4836

}, {

4837

'note': 'Videos tab, sorted by popular',

4838

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4839

'info_dict': {

4840

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4841

'title': 'lex will - Videos',

4842

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4843

'uploader': 'lex will',

4844

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4845

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4846

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4847

'channel': 'lex will',

4848

'tags': ['bible', 'history', 'prophesy'],

4849

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4850

'channel_follower_count': int

4851

},

4852

'playlist_mincount': 199,

4853

}, {

4854

'note': 'Playlists tab',

4855

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4856

'info_dict': {

4857

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4858

'title': 'lex will - Playlists',

4859

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4860

'uploader': 'lex will',

4861

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4862

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4863

'channel': 'lex will',

4864

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4865

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4866

'tags': ['bible', 'history', 'prophesy'],

4867

'channel_follower_count': int

4868

},

4869

'playlist_mincount': 17,

4870

}, {

4871

'note': 'Community tab',

4872

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4873

'info_dict': {

4874

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4875

'title': 'lex will - Community',

4876

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4877

'uploader': 'lex will',

4878

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4879

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4880

'channel': 'lex will',

4881

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4882

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4883

'tags': ['bible', 'history', 'prophesy'],

4884

'channel_follower_count': int

4885

},

4886

'playlist_mincount': 18,

4887

}, {

4888

'note': 'Channels tab',

4889

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4890

'info_dict': {

4891

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4892

'title': 'lex will - Channels',

4893

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4894

'uploader': 'lex will',

4895

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4896

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4897

'channel': 'lex will',

4898

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4899

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4900

'tags': ['bible', 'history', 'prophesy'],

4901

'channel_follower_count': int

4902

},

4903

'playlist_mincount': 12,

4904

}, {

4905

'note': 'Search tab',

4906

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4907

'playlist_mincount': 40,

4908

'info_dict': {

4909

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4910

'title': '3Blue1Brown - Search - linear algebra',

4911

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4912

'uploader': '3Blue1Brown',

4913

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4914

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4915

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4916

'tags': ['Mathematics'],

4917

'channel': '3Blue1Brown',

4918

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4919

'channel_follower_count': int

4920

},

4921

}, {

4922

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4923

'only_matching': True,

4924

}, {

4925

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4926

'only_matching': True,

4927

}, {

4928

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4929

'only_matching': True,

4930

}, {

4931

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4932

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4933

'info_dict': {

4934

'title': '29C3: Not my department',

4935

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4936

'uploader': 'Christiaan008',

4937

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4938

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4939

'tags': [],

4940

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4941

'view_count': int,

4942

'modified_date': '20150605',

4943

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4944

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4945

'channel': 'Christiaan008',

4946

},

4947

'playlist_count': 96,

4948

}, {

4949

'note': 'Large playlist',

4950

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4951

'info_dict': {

4952

'title': 'Uploads from Cauchemar',

4953

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4954

'uploader': 'Cauchemar',

4955

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4956

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4957

'tags': [],

4958

'modified_date': r're:\d{8}',

4959

'channel': 'Cauchemar',

4960

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4961

'view_count': int,

4962

'description': '',

4963

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4964

},

4965

'playlist_mincount': 1123,

4966

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4967

}, {

4968

'note': 'even larger playlist, 8832 videos',

4969

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4970

'only_matching': True,

4971

}, {

4972

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4973

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4974

'info_dict': {

4975

'title': 'Uploads from Interstellar Movie',

4976

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4977

'uploader': 'Interstellar Movie',

4978

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4979

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4980

'tags': [],

4981

'view_count': int,

4982

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4983

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4984

'channel': 'Interstellar Movie',

4985

'description': '',

4986

'modified_date': r're:\d{8}',

4987

},

4988

'playlist_mincount': 21,

4989

}, {

4990

'note': 'Playlist with "show unavailable videos" button',

4991

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4992

'info_dict': {

4993

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4994

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4995

'uploader': 'Phim Siêu Nhân Nhật Bản',

4996

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4997

'view_count': int,

4998

'channel': 'Phim Siêu Nhân Nhật Bản',

4999

'tags': [],

5000

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5001

'description': '',

5002

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

5003

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

5004

'modified_date': r're:\d{8}',

5005

},

5006

'playlist_mincount': 200,

5007

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5008

}, {

5009

'note': 'Playlist with unavailable videos in page 7',

5010

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

5011

'info_dict': {

5012

'title': 'Uploads from BlankTV',

5013

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

5014

'uploader': 'BlankTV',

5015

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5016

'channel': 'BlankTV',

5017

'channel_url': 'https://www.youtube.com/c/blanktv',

5018

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

5019

'view_count': int,

5020

'tags': [],

5021

'uploader_url': 'https://www.youtube.com/c/blanktv',

5022

'modified_date': r're:\d{8}',

5023

'description': '',

5024

},

5025

'playlist_mincount': 1000,

5026

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5027

}, {

5028

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5029

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5030

'info_dict': {

5031

'title': 'Data Analysis with Dr Mike Pound',

5032

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5033

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5034

'uploader': 'Computerphile',

5035

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5036

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5037

'tags': [],

5038

'view_count': int,

5039

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5040

'channel_url': 'https://www.youtube.com/user/Computerphile',

5041

'channel': 'Computerphile',

5042

},

5043

'playlist_mincount': 11,

5044

}, {

5045

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5046

'only_matching': True,

5047

}, {

5048

'note': 'Playlist URL that does not actually serve a playlist',

5049

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5054

'uploader': 'STREEM',

5055

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5056

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5057

'upload_date': '20150526',

5058

'license': 'Standard YouTube License',

5059

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5060

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5067

},

5068

'skip': 'This video is not available.',

5069

'add_ie': [YoutubeIE.ie_key()],

5070

}, {

5071

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5072

'only_matching': True,

5073

}, {

5074

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5075

'only_matching': True,

5076

}, {

5077

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5078

'info_dict': {

5079

'id': 'Wq15eF5vCbI', # This will keep changing

5080

'ext': 'mp4',

5081

'title': str,

5082

'uploader': 'Sky News',

5083

'uploader_id': 'skynews',

5084

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5085

'upload_date': r're:\d{8}',

5086

'description': str,

5087

'categories': ['News & Politics'],

5088

'tags': list,

5089

'like_count': int,

5090

'release_timestamp': 1642502819,

5091

'channel': 'Sky News',

5092

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5093

'age_limit': 0,

5094

'view_count': int,

5095

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5096

'playable_in_embed': True,

5097

'release_date': '20220118',

5098

'availability': 'public',

5099

'live_status': 'is_live',

5100

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5101

'channel_follower_count': int

5102

},

5103

'params': {

5104

'skip_download': True,

5105

},

5106

'expected_warnings': ['Ignoring subtitle tracks found in '],

5107

}, {

5108

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5113

'uploader': 'The Young Turks',

5114

'uploader_id': 'TheYoungTurks',

5115

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5116

'upload_date': '20150715',

5117

'license': 'Standard YouTube License',

5118

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5119

'categories': ['News & Politics'],

5120

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5125

},

5126

'only_matching': True,

5127

}, {

5128

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5129

'only_matching': True,

5130

}, {

5131

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5132

'only_matching': True,

5133

}, {

5134

'note': 'A channel that is not live. Should raise error',

5135

'url': 'https://www.youtube.com/user/numberphile/live',

5136

'only_matching': True,

5137

}, {

5138

'url': 'https://www.youtube.com/feed/trending',

5139

'only_matching': True,

5140

}, {

5141

'url': 'https://www.youtube.com/feed/library',

5142

'only_matching': True,

5143

}, {

5144

'url': 'https://www.youtube.com/feed/history',

5145

'only_matching': True,

5146

}, {

5147

'url': 'https://www.youtube.com/feed/subscriptions',

5148

'only_matching': True,

5149

}, {

5150

'url': 'https://www.youtube.com/feed/watch_later',

5151

'only_matching': True,

5152

}, {

5153

'note': 'Recommended - redirects to home page.',

5154

'url': 'https://www.youtube.com/feed/recommended',

5155

'only_matching': True,

5156

}, {

5157

'note': 'inline playlist with not always working continuations',

5158

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5159

'only_matching': True,

5160

}, {

5161

'url': 'https://www.youtube.com/course',

5162

'only_matching': True,

5163

}, {

5164

'url': 'https://www.youtube.com/zsecurity',

5165

'only_matching': True,

5166

}, {

5167

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5168

'only_matching': True,

5169

}, {

5170

'url': 'https://www.youtube.com/TheYoungTurks/live',

5171

'only_matching': True,

5172

}, {

5173

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5180

}, {

5181

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5182

'only_matching': True,

5183

}, {

5184

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5185

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5186

'only_matching': True

5187

}, {

5188

'note': '/browse/ should redirect to /channel/',

5189

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5190

'only_matching': True

5191

}, {

5192

'note': 'VLPL, should redirect to playlist?list=PL...',

5193

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5194

'info_dict': {

5195

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5196

'uploader': 'NoCopyrightSounds',

5197

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5198

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5199

'title': 'NCS : All Releases 💿',

5200

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5201

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5202

'modified_date': r're:\d{8}',

5203

'view_count': int,

5204

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5205

'tags': [],

5206

'channel': 'NoCopyrightSounds',

5207

},

5208

'playlist_mincount': 166,

5209

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5210

}, {

5211

'note': 'Topic, should redirect to playlist?list=UU...',

5212

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5213

'info_dict': {

5214

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5215

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5216

'title': 'Uploads from Royalty Free Music - Topic',

5217

'uploader': 'Royalty Free Music - Topic',

5218

'tags': [],

5219

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5220

'channel': 'Royalty Free Music - Topic',

5221

'view_count': int,

5222

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5223

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5224

'modified_date': r're:\d{8}',

5225

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5226

'description': '',

5227

},

5228

'expected_warnings': [

5229

'The URL does not have a videos tab',

5230

r'[Uu]navailable videos (are|will be) hidden',

5231

],

5232

'playlist_mincount': 101,

5233

}, {

5234

'note': 'Topic without a UU playlist',

5235

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5236

'info_dict': {

5237

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5238

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5239

'tags': [],

5240

},

5241

'expected_warnings': [

5242

'the playlist redirect gave error',

5243

],

5244

'playlist_mincount': 9,

5245

}, {

5246

'note': 'Youtube music Album',

5247

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5248

'info_dict': {

5249

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5250

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5255

'modified_date': r're:\d{8}',

5256

},

5257

'playlist_count': 50,

5258

}, {

5259

'note': 'unlisted single video playlist',

5260

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5261

'info_dict': {

5262

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5263

'uploader': 'colethedj',

5264

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5265

'title': 'yt-dlp unlisted playlist test',

5266

'availability': 'unlisted',

5267

'tags': [],

5268

'modified_date': '20220418',

5269

'channel': 'colethedj',

5270

'view_count': int,

5271

'description': '',

5272

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5273

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5274

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5279

'url': 'https://www.youtube.com/feed/recommended',

5280

'info_dict': {

5281

'id': 'recommended',

5282

'title': 'recommended',

5283

'tags': [],

5284

},

5285

'playlist_mincount': 50,

5286

'params': {

5287

'skip_download': True,

5288

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5289

},

5290

}, {

5291

'note': 'API Fallback: /videos tab, sorted by oldest first',

5292

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5293

'info_dict': {

5294

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5295

'title': 'Cody\'sLab - Videos',

5296

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5297

'uploader': 'Cody\'sLab',

5298

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5299

'channel': 'Cody\'sLab',

5300

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5301

'tags': [],

5302

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5303

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5304

'channel_follower_count': int

5305

},

5306

'playlist_mincount': 650,

5307

'params': {

5308

'skip_download': True,

5309

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5310

},

5311

}, {

5312

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5313

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5314

'info_dict': {

5315

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5316

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5317

'title': 'Uploads from Royalty Free Music - Topic',

5318

'uploader': 'Royalty Free Music - Topic',

5319

'modified_date': r're:\d{8}',

5320

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5321

'description': '',

5322

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5323

'tags': [],

5324

'channel': 'Royalty Free Music - Topic',

5325

'view_count': int,

5326

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5327

},

5328

'expected_warnings': [

5329

'does not have a videos tab',

5330

r'[Uu]navailable videos (are|will be) hidden',

5331

],

5332

'playlist_mincount': 101,

5333

'params': {

5334

'skip_download': True,

5335

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5336

},

5337

}, {

5338

'note': 'non-standard redirect to regional channel',

5339

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5340

'only_matching': True

5341

}, {

5342

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5343

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5344

'info_dict': {

5345

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5346

'modified_date': '20220407',

5347

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5348

'tags': [],

5349

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5350

'uploader': 'pukkandan',

5351

'availability': 'unlisted',

5352

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5353

'channel': 'pukkandan',

5354

'description': 'Test for collaborative playlist',

5355

'title': 'yt-dlp test - collaborative playlist',

5356

'view_count': int,

5357

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5358

},

5359

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5364

return False if YoutubeIE.suitable(url) else super().suitable(url)

5365

5366

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5367

5368

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5369

def _real_extract(self, url, smuggled_data):

5370

item_id = self._match_id(url)

5371

url = urllib.parse.urlunparse(

5372

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5373

compat_opts = self.get_param('compat_opts', [])

5374

5375

def get_mobj(url):

5376

mobj = self._URL_RE.match(url).groupdict()

5377

mobj.update((k, '') for k, v in mobj.items() if v is None)

5378

return mobj

5379

5380

mobj, redirect_warning = get_mobj(url), None

5381

# Youtube returns incomplete data if tabname is not lower case

5382

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5383

if is_channel:

5384

if smuggled_data.get('is_music_url'):

5385

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5386

item_id = item_id[2:]

5387

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5388

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5389

mdata = self._extract_tab_endpoint(

5390

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5391

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5392

get_all=False, expected_type=str)

5393

if not murl:

5394

raise ExtractorError('Failed to resolve album to playlist')

5395

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5396

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5397

pre = f'https://www.youtube.com/channel/{item_id}'

5398

5399

original_tab_name = tab

5400

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5401

# Home URLs should redirect to /videos/

5402

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5403

'To download only the videos in the home page, add a "/featured" to the URL')

5404

tab = '/videos'

5405

5406

url = ''.join((pre, tab, post))

5407

mobj = get_mobj(url)

5408

5409

# Handle both video/playlist URLs

5410

qs = parse_qs(url)

5411

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5412

5413

if not video_id and mobj['not_channel'].startswith('watch'):

5414

if not playlist_id:

5415

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5416

raise ExtractorError('Unable to recognize tab page')

5417

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5418

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5419

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5420

mobj = get_mobj(url)

5421

5422

if video_id and playlist_id:

5423

if self.get_param('noplaylist'):

5424

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5425

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5426

ie=YoutubeIE.ie_key(), video_id=video_id)

5427

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5428

5429

data, ytcfg = self._extract_data(url, item_id)

5430

5431

# YouTube may provide a non-standard redirect to the regional channel

5432

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5433

redirect_url = traverse_obj(

5434

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5435

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5436

redirect_url = ''.join((

5437

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5438

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5439

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5440

5441

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5442

if tabs:

5443

selected_tab = self._extract_selected_tab(tabs)

5444

selected_tab_name = selected_tab.get('title', '').lower()

5445

if selected_tab_name == 'home':

5446

selected_tab_name = 'featured'

5447

requested_tab_name = mobj['tab'][1:]

5448

if 'no-youtube-channel-redirect' not in compat_opts:

5449

if requested_tab_name == 'live': # Live tab should have redirected to the video

5450

raise UserNotLive(video_id=mobj['id'])

5451

if requested_tab_name not in ('', selected_tab_name):

5452

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5453

if not original_tab_name:

5454

if item_id[:2] == 'UC':

5455

# Topic channels don't have /videos. Use the equivalent playlist instead

5456

pl_id = f'UU{item_id[2:]}'

5457

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5458

try:

5459

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5460

except ExtractorError:

5461

redirect_warning += ' and the playlist redirect gave error'

5462

else:

5463

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5464

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5465

if selected_tab_name and selected_tab_name != requested_tab_name:

5466

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5467

else:

5468

raise ExtractorError(redirect_warning, expected=True)

5469

5470

if redirect_warning:

5471

self.to_screen(redirect_warning)

5472

self.write_debug(f'Final URL: {url}')

5473

5474

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5475

if 'no-youtube-unavailable-videos' not in compat_opts:

5476

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5477

self._extract_and_report_alerts(data, only_once=True)

5478

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5479

if tabs:

5480

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5481

5482

playlist = traverse_obj(

5483

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5484

if playlist:

5485

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5486

5487

video_id = traverse_obj(

5488

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5489

if video_id:

5490

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5491

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5492

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5493

ie=YoutubeIE.ie_key(), video_id=video_id)

5494

5495

raise ExtractorError('Unable to recognize tab page')

5496

5497

5498

class YoutubePlaylistIE(InfoExtractor):

5499

IE_DESC = 'YouTube playlists'

5500

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5511

)''' % {

5512

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5513

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5514

}

5515

IE_NAME = 'youtube:playlist'

5516

_TESTS = [{

5517

'note': 'issue #673',

5518

'url': 'PLBB231211A4F62143',

5519

'info_dict': {

5520

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5521

'id': 'PLBB231211A4F62143',

5522

'uploader': 'Wickman',

5523

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5524

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5525

'view_count': int,

5526

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5527

'modified_date': r're:\d{8}',

5528

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5529

'channel': 'Wickman',

5530

'tags': [],

5531

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5532

},

5533

'playlist_mincount': 29,

5534

}, {

5535

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5536

'info_dict': {

5537

'title': 'YDL_safe_search',

5538

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5539

},

5540

'playlist_count': 2,

5541

'skip': 'This playlist is private',

5542

}, {

5543

'note': 'embedded',

5544

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5549

'uploader': 'milan',

5550

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5551

'description': '',

5552

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5553

'tags': [],

5554

'modified_date': '20140919',

5555

'view_count': int,

5556

'channel': 'milan',

5557

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5558

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5559

},

5560

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5561

}, {

5562

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5563

'playlist_mincount': 455,

5564

'info_dict': {

5565

'title': '2018 Chinese New Singles (11/6 updated)',

5566

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5567

'uploader': 'LBK',

5568

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5569

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5570

'channel': 'LBK',

5571

'view_count': int,

5572

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5573

'tags': [],

5574

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5575

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5576

'modified_date': r're:\d{8}',

5577

},

5578

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5579

}, {

5580

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5581

'only_matching': True,

5582

}, {

5583

# music album playlist

5584

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5585

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5590

if YoutubeTabIE.suitable(url):

5591

return False

5592

from ..utils import parse_qs

5593

qs = parse_qs(url)

5594

if qs.get('v', [None])[0]:

5595

return False

5596

return super().suitable(url)

5597

5598

def _real_extract(self, url):

5599

playlist_id = self._match_id(url)

5600

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5601

url = update_url_query(

5602

'https://www.youtube.com/playlist',

5603

parse_qs(url) or {'list': playlist_id})

5604

if is_music_url:

5605

url = smuggle_url(url, {'is_music_url': True})

5606

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5607

5608

5609

class YoutubeYtBeIE(InfoExtractor):

5610

IE_DESC = 'youtu.be'

5611

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5612

_TESTS = [{

5613

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5618

'uploader': 'Backus-Page House Museum',

5619

'uploader_id': 'backuspagemuseum',

5620

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5621

'upload_date': '20161008',

5622

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5623

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5628

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5629

'channel': 'Backus-Page House Museum',

5630

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5631

'live_status': 'not_live',

5632

'view_count': int,

5633

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5634

'availability': 'public',

5635

'duration': 59,

5636

'comment_count': int,

5637

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5642

},

5643

}, {

5644

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5645

'only_matching': True,

5646

}]

5647

5648

def _real_extract(self, url):

5649

mobj = self._match_valid_url(url)

5650

video_id = mobj.group('id')

5651

playlist_id = mobj.group('playlist_id')

5652

return self.url_result(

5653

update_url_query('https://www.youtube.com/watch', {

5654

'v': video_id,

5655

'list': playlist_id,

5656

'feature': 'youtu.be',

5657

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5658

5659

5660

class YoutubeLivestreamEmbedIE(InfoExtractor):

5661

IE_DESC = 'YouTube livestream embeds'

5662

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5663

_TESTS = [{

5664

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5665

'only_matching': True,

5666

}]

5667

5668

def _real_extract(self, url):

5669

channel_id = self._match_id(url)

5670

return self.url_result(

5671

f'https://www.youtube.com/channel/{channel_id}/live',

5672

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5673

5674

5675

class YoutubeYtUserIE(InfoExtractor):

5676

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5677

IE_NAME = 'youtube:user'

5678

_VALID_URL = r'ytuser:(?P<id>.+)'

5679

_TESTS = [{

5680

'url': 'ytuser:phihag',

5681

'only_matching': True,

5682

}]

5683

5684

def _real_extract(self, url):

5685

user_id = self._match_id(url)

5686

return self.url_result(

5687

'https://www.youtube.com/user/%s/videos' % user_id,

5688

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5689

5690

5691

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5692

IE_NAME = 'youtube:favorites'

5693

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5694

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5695

_LOGIN_REQUIRED = True

5696

_TESTS = [{

5697

'url': ':ytfav',

5698

'only_matching': True,

5699

}, {

5700

'url': ':ytfavorites',

5701

'only_matching': True,

5702

}]

5703

5704

def _real_extract(self, url):

5705

return self.url_result(

5706

'https://www.youtube.com/playlist?list=LL',

5707

ie=YoutubeTabIE.ie_key())

5708

5709

5710

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5711

IE_NAME = 'youtube:notif'

5712

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5713

_VALID_URL = r':ytnotif(?:ication)?s?'

5714

_LOGIN_REQUIRED = True

5715

_TESTS = [{

5716

'url': ':ytnotif',

5717

'only_matching': True,

5718

}, {

5719

'url': ':ytnotifications',

5720

'only_matching': True,

5721

}]

5722

5723

def _extract_notification_menu(self, response, continuation_list):

5724

notification_list = traverse_obj(

5725

response,

5726

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5727

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5728

expected_type=list) or []

5729

continuation_list[0] = None

5730

for item in notification_list:

5731

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5732

if entry:

5733

yield entry

5734

continuation = item.get('continuationItemRenderer')

5735

if continuation:

5736

continuation_list[0] = continuation

5737

5738

def _extract_notification_renderer(self, notification):

5739

video_id = traverse_obj(

5740

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5741

url = f'https://www.youtube.com/watch?v={video_id}'

5742

channel_id = None

5743

if not video_id:

5744

browse_ep = traverse_obj(

5745

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5746

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5747

post_id = self._search_regex(

5748

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5749

'post id', default=None)

5750

if not channel_id or not post_id:

5751

return

5752

# The direct /post url redirects to this in the browser

5753

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5754

5755

channel = traverse_obj(

5756

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5757

expected_type=str)

5758

notification_title = self._get_text(notification, 'shortMessage')

5759

if notification_title:

5760

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5761

# TODO: handle recommended videos

5762

title = self._search_regex(

5763

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5764

'video title', default=None)

5765

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5766

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5772

'video_id': video_id,

5773

'title': title,

5774

'channel_id': channel_id,

5775

'channel': channel,

5776

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5777

'upload_date': upload_date,

5778

}

5779

5780

def _notification_menu_entries(self, ytcfg):

5781

continuation_list = [None]

5782

response = None

5783

for page in itertools.count(1):

5784

ctoken = traverse_obj(

5785

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5786

response = self._extract_response(

5787

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5788

ep='notification/get_notification_menu', check_get_keys='actions',

5789

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5790

yield from self._extract_notification_menu(response, continuation_list)

5791

if not continuation_list[0]:

5792

break

5793

5794

def _real_extract(self, url):

5795

display_id = 'notifications'

5796

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5797

self._report_playlist_authcheck(ytcfg)

5798

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5799

5800

5801

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5802

IE_DESC = 'YouTube search'

5803

IE_NAME = 'youtube:search'

5804

_SEARCH_KEY = 'ytsearch'

5805

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5806

_TESTS = [{

5807

'url': 'ytsearch5:youtube-dl test video',

5808

'playlist_count': 5,

5809

'info_dict': {

5810

'id': 'youtube-dl test video',

5811

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5817

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5818

_SEARCH_KEY = 'ytsearchdate'

5819

IE_DESC = 'YouTube search, newest videos first'

5820

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5821

_TESTS = [{

5822

'url': 'ytsearchdate5:youtube-dl test video',

5823

'playlist_count': 5,

5824

'info_dict': {

5825

'id': 'youtube-dl test video',

5826

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5832

IE_DESC = 'YouTube search URLs with sorting and filter support'

5833

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5834

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5835

_TESTS = [{

5836

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5837

'playlist_mincount': 5,

5838

'info_dict': {

5839

'id': 'youtube-dl test video',

5840

'title': 'youtube-dl test video',

5841

}

5842

}, {

5843

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5844

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5851

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

5856

# 'entries': [{

5857

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5863

'only_matching': True,

5864

}]

5865

5866

def _real_extract(self, url):

5867

qs = parse_qs(url)

5868

query = (qs.get('search_query') or qs.get('q'))[0]

5869

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5870

5871

5872

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5873

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

5874

IE_NAME = 'youtube:music:search_url'

5875

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5876

_TESTS = [{

5877

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5878

'playlist_count': 16,

5879

'info_dict': {

5880

'id': 'royalty free music',

5881

'title': 'royalty free music',

5882

}

5883

}, {

5884

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5885

'playlist_mincount': 30,

5886

'info_dict': {

5887

'id': 'royalty free music - songs',

5888

'title': 'royalty free music - songs',

5889

},

5890

'params': {'extract_flat': 'in_playlist'}

5891

}, {

5892

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5893

'playlist_mincount': 30,

5894

'info_dict': {

5895

'id': 'royalty free music - community playlists',

5896

'title': 'royalty free music - community playlists',

5897

},

5898

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5903

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5904

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5905

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5906

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5907

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5908

}

5909

5910

def _real_extract(self, url):

5911

qs = parse_qs(url)

5912

query = (qs.get('search_query') or qs.get('q'))[0]

5913

params = qs.get('sp', (None,))[0]

5914

if params:

5915

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5916

else:

5917

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5918

params = self._SECTIONS.get(section)

5919

if not params:

5920

section = None

5921

title = join_nonempty(query, section, delim=' - ')

5922

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5923

5924

5925

class YoutubeFeedsInfoExtractor(InfoExtractor):

5926

"""

5927

Base class for feed extractors

5928

Subclasses must re-define the _FEED_NAME property.

5929

"""

5930

_LOGIN_REQUIRED = True

5931

_FEED_NAME = 'feeds'

5932

5933

def _real_initialize(self):

5934

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5939

5940

def _real_extract(self, url):

5941

return self.url_result(

5942

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5943

5944

5945

class YoutubeWatchLaterIE(InfoExtractor):

5946

IE_NAME = 'youtube:watchlater'

5947

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5948

_VALID_URL = r':ytwatchlater'

5949

_TESTS = [{

5950

'url': ':ytwatchlater',

5951

'only_matching': True,

5952

}]

5953

5954

def _real_extract(self, url):

5955

return self.url_result(

5956

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5957

5958

5959

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5960

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5961

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5962

_FEED_NAME = 'recommended'

5963

_LOGIN_REQUIRED = False

5964

_TESTS = [{

5965

'url': ':ytrec',

5966

'only_matching': True,

5967

}, {

5968

'url': ':ytrecommended',

5969

'only_matching': True,

5970

}, {

5971

'url': 'https://youtube.com',

5972

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5977

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5978

_VALID_URL = r':ytsub(?:scription)?s?'

5979

_FEED_NAME = 'subscriptions'

5980

_TESTS = [{

5981

'url': ':ytsubs',

5982

'only_matching': True,

5983

}, {

5984

'url': ':ytsubscriptions',

5985

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5990

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5991

_VALID_URL = r':ythis(?:tory)?'

5992

_FEED_NAME = 'history'

5993

_TESTS = [{

5994

'url': ':ythistory',

5995

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

6000

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

6001

IE_NAME = 'youtube:stories'

6002

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

6003

_TESTS = [{

6004

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

6005

'only_matching': True,

6006

}]

6007

6008

def _real_extract(self, url):

6009

playlist_id = f'RLTD{self._match_id(url)}'

6010

return self.url_result(

6011

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

6012

ie=YoutubeTabIE, video_id=playlist_id)

6013

6014

6015

class YoutubeTruncatedURLIE(InfoExtractor):

6016

IE_NAME = 'youtube:truncated_url'

6017

IE_DESC = False # Do not list

6018

_VALID_URL = r'''(?x)

6019

(?:https?://)?

6020

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

6021

(?:watch\?(?:

6022

feature=[a-z_]+|

6023

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6036

'only_matching': True,

6037

}, {

6038

'url': 'https://www.youtube.com/watch?',

6039

'only_matching': True,

6040

}, {

6041

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6042

'only_matching': True,

6043

}, {

6044

'url': 'https://www.youtube.com/watch?feature=foo',

6045

'only_matching': True,

6046

}, {

6047

'url': 'https://www.youtube.com/watch?hl=en-GB',

6048

'only_matching': True,

6049

}, {

6050

'url': 'https://www.youtube.com/watch?t=2372',

6051

'only_matching': True,

6052

}]

6053

6054

def _real_extract(self, url):

6055

raise ExtractorError(

6056

'Did you forget to quote the URL? Remember that & is a meta '

6057

'character in most shells, so you want to put the URL in quotes, '

6058

'like youtube-dl '

6059

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6060

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6065

IE_NAME = 'youtube:clip'

6066

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6067

_TESTS = [{

6068

# FIXME: Other metadata should be extracted from the clip, not from the base video

6069

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6070

'info_dict': {

6071

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6072

'ext': 'mp4',

6073

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6078

'categories': ['Gaming'],

6079

'channel': 'Scott The Woz',

6080

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6081

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6082

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6083

'like_count': int,

6084

'playable_in_embed': True,

6085

'tags': 'count:17',

6086

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6087

'title': 'Mobile Games on Console - Scott The Woz',

6088

'upload_date': '20210920',

6089

'uploader': 'Scott The Woz',

6090

'uploader_id': 'scottthewoz',

6091

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6092

'view_count': int,

6093

'live_status': 'not_live',

6094

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6099

clip_id = self._match_id(url)

6100

_, data = self._extract_webpage(url, clip_id)

6101

6102

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6103

if not video_id:

6104

raise ExtractorError('Unable to find video ID')

6105

6106

clip_data = traverse_obj(data, (

6107

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6108

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6109

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6110

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6111

6112

return {

6113

'_type': 'url_transparent',

6114

'url': f'https://www.youtube.com/watch?v={video_id}',

6115

'ie_key': YoutubeIE.ie_key(),

6116

'id': clip_id,

6117

'section_start': int(clip_data['startTimeMs']) / 1000,

6118

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6123

IE_NAME = 'youtube:truncated_id'

6124

IE_DESC = False # Do not list

6125

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6126

6127

_TESTS = [{

6128

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6129

'only_matching': True,

6130

}]

6131

6132

def _real_extract(self, url):

6133

video_id = self._match_id(url)

6134

raise ExtractorError(

6135

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6136

expected=True)