jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from ..compat import functools
	21	from ..jsinterp import JSInterpreter
	22	from ..utils import (
	23	NO_DEFAULT,
	24	ExtractorError,
	25	bug_reports_message,
	26	classproperty,
	27	clean_html,
	28	datetime_from_str,
	29	dict_get,
	30	error_to_compat_str,
	31	float_or_none,
	32	format_field,
	33	get_first,
	34	int_or_none,
	35	is_html,
	36	join_nonempty,
	37	js_to_json,
	38	mimetype2ext,
	39	network_exceptions,
	40	orderedSet,
	41	parse_codecs,
	42	parse_count,
	43	parse_duration,
	44	parse_iso8601,
	45	parse_qs,
	46	qualities,
	47	remove_end,
	48	remove_start,
	49	smuggle_url,
	50	str_or_none,
	51	str_to_int,
	52	strftime_or_none,
	53	traverse_obj,
	54	try_get,
	55	unescapeHTML,
	56	unified_strdate,
	57	unified_timestamp,
	58	unsmuggle_url,
	59	update_url_query,
	60	url_or_none,
	61	urljoin,
	62	variadic,
	63	)
	64
	65	# any clients starting with _ cannot be explicitly requested by the user
	66	INNERTUBE_CLIENTS = {
	67	'web': {
	68	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	69	'INNERTUBE_CONTEXT': {
	70	'client': {
	71	'clientName': 'WEB',
	72	'clientVersion': '2.20211221.00.00',
	73	}
	74	},
	75	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	76	},
	77	'web_embedded': {
	78	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	79	'INNERTUBE_CONTEXT': {
	80	'client': {
	81	'clientName': 'WEB_EMBEDDED_PLAYER',
	82	'clientVersion': '1.20211215.00.01',
	83	},
	84	},
	85	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	86	},
	87	'web_music': {
	88	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	89	'INNERTUBE_HOST': 'music.youtube.com',
	90	'INNERTUBE_CONTEXT': {
	91	'client': {
	92	'clientName': 'WEB_REMIX',
	93	'clientVersion': '1.20211213.00.00',
	94	}
	95	},
	96	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	97	},
	98	'web_creator': {
	99	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	100	'INNERTUBE_CONTEXT': {
	101	'client': {
	102	'clientName': 'WEB_CREATOR',
	103	'clientVersion': '1.20211220.02.00',
	104	}
	105	},
	106	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	107	},
	108	'android': {
	109	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	110	'INNERTUBE_CONTEXT': {
	111	'client': {
	112	'clientName': 'ANDROID',
	113	'clientVersion': '16.49',
	114	}
	115	},
	116	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	117	'REQUIRE_JS_PLAYER': False
	118	},
	119	'android_embedded': {
	120	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	124	'clientVersion': '16.49',
	125	},
	126	},
	127	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	128	'REQUIRE_JS_PLAYER': False
	129	},
	130	'android_music': {
	131	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	132	'INNERTUBE_CONTEXT': {
	133	'client': {
	134	'clientName': 'ANDROID_MUSIC',
	135	'clientVersion': '4.57',
	136	}
	137	},
	138	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	139	'REQUIRE_JS_PLAYER': False
	140	},
	141	'android_creator': {
	142	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	143	'INNERTUBE_CONTEXT': {
	144	'client': {
	145	'clientName': 'ANDROID_CREATOR',
	146	'clientVersion': '21.47',
	147	},
	148	},
	149	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	150	'REQUIRE_JS_PLAYER': False
	151	},
	152	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	153	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	154	'ios': {
	155	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	156	'INNERTUBE_CONTEXT': {
	157	'client': {
	158	'clientName': 'IOS',
	159	'clientVersion': '16.46',
	160	'deviceModel': 'iPhone14,3',
	161	}
	162	},
	163	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	164	'REQUIRE_JS_PLAYER': False
	165	},
	166	'ios_embedded': {
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS_MESSAGES_EXTENSION',
	170	'clientVersion': '16.46',
	171	'deviceModel': 'iPhone14,3',
	172	},
	173	},
	174	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	175	'REQUIRE_JS_PLAYER': False
	176	},
	177	'ios_music': {
	178	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MUSIC',
	182	'clientVersion': '4.57',
	183	},
	184	},
	185	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	186	'REQUIRE_JS_PLAYER': False
	187	},
	188	'ios_creator': {
	189	'INNERTUBE_CONTEXT': {
	190	'client': {
	191	'clientName': 'IOS_CREATOR',
	192	'clientVersion': '21.47',
	193	},
	194	},
	195	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	196	'REQUIRE_JS_PLAYER': False
	197	},
	198	# mweb has 'ultralow' formats
	199	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	200	'mweb': {
	201	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	202	'INNERTUBE_CONTEXT': {
	203	'client': {
	204	'clientName': 'MWEB',
	205	'clientVersion': '2.20211221.01.00',
	206	}
	207	},
	208	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	209	},
	210	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	211	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	212	'tv_embedded': {
	213	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	214	'INNERTUBE_CONTEXT': {
	215	'client': {
	216	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	217	'clientVersion': '2.0',
	218	},
	219	},
	220	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	221	},
	222	}
	223
	224
	225	def _split_innertube_client(client_name):
	226	variant, *base = client_name.rsplit('.', 1)
	227	if base:
	228	return variant, base[0], variant
	229	base, *variant = client_name.split('_', 1)
	230	return client_name, base, variant[0] if variant else None
	231
	232
	233	def build_innertube_clients():
	234	THIRD_PARTY = {
	235	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	236	}
	237	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	238	priority = qualities(BASE_CLIENTS[::-1])
	239
	240	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	241	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	242	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	243	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	244	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	245
	246	_, base_client, variant = _split_innertube_client(client)
	247	ytcfg['priority'] = 10 * priority(base_client)
	248
	249	if not variant:
	250	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	251	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	252	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	253	embedscreen['priority'] -= 3
	254	elif variant == 'embedded':
	255	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	256	ytcfg['priority'] -= 2
	257	else:
	258	ytcfg['priority'] -= 3
	259
	260
	261	build_innertube_clients()
	262
	263
	264	class YoutubeBaseInfoExtractor(InfoExtractor):
	265	"""Provide base functions for Youtube extractors"""
	266
	267	_RESERVED_NAMES = (
	268	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	269	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	270	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	271	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	272
	273	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	274
	275	# _NETRC_MACHINE = 'youtube'
	276
	277	# If True it will raise an error if no login info is provided
	278	_LOGIN_REQUIRED = False
	279
	280	_INVIDIOUS_SITES = (
	281	# invidious-redirect websites
	282	r'(?:www\.)?redirect\.invidious\.io',
	283	r'(?:(?:www\|dev)\.)?invidio\.us',
	284	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	285	r'(?:www\.)?invidious\.pussthecat\.org',
	286	r'(?:www\.)?invidious\.zee\.li',
	287	r'(?:www\.)?invidious\.ethibox\.fr',
	288	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	289	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	290	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	291	# youtube-dl invidious instances list
	292	r'(?:(?:www\|no)\.)?invidiou\.sh',
	293	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	294	r'(?:www\.)?invidious\.kabi\.tk',
	295	r'(?:www\.)?invidious\.mastodon\.host',
	296	r'(?:www\.)?invidious\.zapashcanon\.fr',
	297	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	298	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	299	r'(?:www\.)?invidious\.himiko\.cloud',
	300	r'(?:www\.)?invidious\.reallyancient\.tech',
	301	r'(?:www\.)?invidious\.tube',
	302	r'(?:www\.)?invidiou\.site',
	303	r'(?:www\.)?invidious\.site',
	304	r'(?:www\.)?invidious\.xyz',
	305	r'(?:www\.)?invidious\.nixnet\.xyz',
	306	r'(?:www\.)?invidious\.048596\.xyz',
	307	r'(?:www\.)?invidious\.drycat\.fr',
	308	r'(?:www\.)?inv\.skyn3t\.in',
	309	r'(?:www\.)?tube\.poal\.co',
	310	r'(?:www\.)?tube\.connect\.cafe',
	311	r'(?:www\.)?vid\.wxzm\.sx',
	312	r'(?:www\.)?vid\.mint\.lgbt',
	313	r'(?:www\.)?vid\.puffyan\.us',
	314	r'(?:www\.)?yewtu\.be',
	315	r'(?:www\.)?yt\.elukerio\.org',
	316	r'(?:www\.)?yt\.lelux\.fi',
	317	r'(?:www\.)?invidious\.ggc-project\.de',
	318	r'(?:www\.)?yt\.maisputain\.ovh',
	319	r'(?:www\.)?ytprivate\.com',
	320	r'(?:www\.)?invidious\.13ad\.de',
	321	r'(?:www\.)?invidious\.toot\.koeln',
	322	r'(?:www\.)?invidious\.fdn\.fr',
	323	r'(?:www\.)?watch\.nettohikari\.com',
	324	r'(?:www\.)?invidious\.namazso\.eu',
	325	r'(?:www\.)?invidious\.silkky\.cloud',
	326	r'(?:www\.)?invidious\.exonip\.de',
	327	r'(?:www\.)?invidious\.riverside\.rocks',
	328	r'(?:www\.)?invidious\.blamefran\.net',
	329	r'(?:www\.)?invidious\.moomoo\.de',
	330	r'(?:www\.)?ytb\.trom\.tf',
	331	r'(?:www\.)?yt\.cyberhost\.uk',
	332	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	333	r'(?:www\.)?qklhadlycap4cnod\.onion',
	334	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	335	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	336	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	337	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	338	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	339	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	340	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	341	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	342	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	343	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	344	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	345	r'(?:www\.)?piped\.kavin\.rocks',
	346	r'(?:www\.)?piped\.silkky\.cloud',
	347	r'(?:www\.)?piped\.tokhmi\.xyz',
	348	r'(?:www\.)?piped\.moomoo\.me',
	349	r'(?:www\.)?il\.ax',
	350	r'(?:www\.)?piped\.syncpundit\.com',
	351	r'(?:www\.)?piped\.mha\.fi',
	352	r'(?:www\.)?piped\.mint\.lgbt',
	353	r'(?:www\.)?piped\.privacy\.com\.de',
	354	)
	355
	356	def _initialize_consent(self):
	357	cookies = self._get_cookies('https://www.youtube.com/')
	358	if cookies.get('__Secure-3PSID'):
	359	return
	360	consent_id = None
	361	consent = cookies.get('CONSENT')
	362	if consent:
	363	if 'YES' in consent.value:
	364	return
	365	consent_id = self._search_regex(
	366	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	367	if not consent_id:
	368	consent_id = random.randint(100, 999)
	369	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	370
	371	def _initialize_pref(self):
	372	cookies = self._get_cookies('https://www.youtube.com/')
	373	pref_cookie = cookies.get('PREF')
	374	pref = {}
	375	if pref_cookie:
	376	try:
	377	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	378	except ValueError:
	379	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	380	pref.update({'hl': 'en', 'tz': 'UTC'})
	381	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	382
	383	def _real_initialize(self):
	384	self._initialize_pref()
	385	self._initialize_consent()
	386	self._check_login_required()
	387
	388	def _check_login_required(self):
	389	if self._LOGIN_REQUIRED and not self._cookies_passed:
	390	self.raise_login_required('Login details are needed to download this content', method='cookies')
	391
	392	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	393	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	394
	395	def _get_default_ytcfg(self, client='web'):
	396	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	397
	398	def _get_innertube_host(self, client='web'):
	399	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	400
	401	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	402	# try_get but with fallback to default ytcfg client values when present
	403	_func = lambda y: try_get(y, getter, expected_type)
	404	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	405
	406	def _extract_client_name(self, ytcfg, default_client='web'):
	407	return self._ytcfg_get_safe(
	408	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	409	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	410
	411	def _extract_client_version(self, ytcfg, default_client='web'):
	412	return self._ytcfg_get_safe(
	413	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	414	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	415
	416	def _select_api_hostname(self, req_api_hostname, default_client=None):
	417	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	418	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	419
	420	def _extract_api_key(self, ytcfg=None, default_client='web'):
	421	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	422
	423	def _extract_context(self, ytcfg=None, default_client='web'):
	424	context = get_first(
	425	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	426	# Enforce language and tz for extraction
	427	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	428	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	429	return context
	430
	431	_SAPISID = None
	432
	433	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	434	time_now = round(time.time())
	435	if self._SAPISID is None:
	436	yt_cookies = self._get_cookies('https://www.youtube.com')
	437	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	438	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	439	sapisid_cookie = dict_get(
	440	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	441	if sapisid_cookie and sapisid_cookie.value:
	442	self._SAPISID = sapisid_cookie.value
	443	self.write_debug('Extracted SAPISID cookie')
	444	# SAPISID cookie is required if not already present
	445	if not yt_cookies.get('SAPISID'):
	446	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	447	self._set_cookie(
	448	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	449	else:
	450	self._SAPISID = False
	451	if not self._SAPISID:
	452	return None
	453	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	454	sapisidhash = hashlib.sha1(
	455	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	456	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	457
	458	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	459	note='Downloading API JSON', errnote='Unable to download API page',
	460	context=None, api_key=None, api_hostname=None, default_client='web'):
	461
	462	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	463	data.update(query)
	464	real_headers = self.generate_api_headers(default_client=default_client)
	465	real_headers.update({'content-type': 'application/json'})
	466	if headers:
	467	real_headers.update(headers)
	468	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	469	or api_key or self._extract_api_key(default_client=default_client))
	470	return self._download_json(
	471	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	472	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	473	data=json.dumps(data).encode('utf8'), headers=real_headers,
	474	query={'key': api_key, 'prettyPrint': 'false'})
	475
	476	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	477	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	478
	479	@staticmethod
	480	def _extract_session_index(*data):
	481	"""
	482	Index of current account in account list.
	483	See: https://github.com/yt-dlp/yt-dlp/pull/519
	484	"""
	485	for ytcfg in data:
	486	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	487	if session_index is not None:
	488	return session_index
	489
	490	# Deprecated?
	491	def _extract_identity_token(self, ytcfg=None, webpage=None):
	492	if ytcfg:
	493	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	494	if token:
	495	return token
	496	if webpage:
	497	return self._search_regex(
	498	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	499	'identity token', default=None, fatal=False)
	500

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from ..compat import functools

21

from ..jsinterp import JSInterpreter

22

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

66

INNERTUBE_CLIENTS = {

67

'web': {

68

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

69

'INNERTUBE_CONTEXT': {

70

'client': {

71

'clientName': 'WEB',

72

'clientVersion': '2.20211221.00.00',

73

}

74

},

75

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

76

},

77

'web_embedded': {

78

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

79

'INNERTUBE_CONTEXT': {

80

'client': {

81

'clientName': 'WEB_EMBEDDED_PLAYER',

82

'clientVersion': '1.20211215.00.01',

83

},

84

},

85

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

86

},

87

'web_music': {

88

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

89

'INNERTUBE_HOST': 'music.youtube.com',

90

'INNERTUBE_CONTEXT': {

91

'client': {

92

'clientName': 'WEB_REMIX',

93

'clientVersion': '1.20211213.00.00',

94

}

95

},

96

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

97

},

98

'web_creator': {

99

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

100

'INNERTUBE_CONTEXT': {

101

'client': {

102

'clientName': 'WEB_CREATOR',

103

'clientVersion': '1.20211220.02.00',

104

}

105

},

106

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

107

},

108

'android': {

109

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

110

'INNERTUBE_CONTEXT': {

111

'client': {

112

'clientName': 'ANDROID',

113

'clientVersion': '16.49',

114

}

115

},

116

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

117

'REQUIRE_JS_PLAYER': False

118

},

119

'android_embedded': {

120

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID_EMBEDDED_PLAYER',

124

'clientVersion': '16.49',

125

},

126

},

127

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

128

'REQUIRE_JS_PLAYER': False

129

},

130

'android_music': {

131

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

132

'INNERTUBE_CONTEXT': {

133

'client': {

134

'clientName': 'ANDROID_MUSIC',

135

'clientVersion': '4.57',

136

}

137

},

138

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

139

'REQUIRE_JS_PLAYER': False

140

},

141

'android_creator': {

142

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

143

'INNERTUBE_CONTEXT': {

144

'client': {

145

'clientName': 'ANDROID_CREATOR',

146

'clientVersion': '21.47',

147

},

148

},

149

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

150

'REQUIRE_JS_PLAYER': False

151

},

152

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

153

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

154

'ios': {

155

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

156

'INNERTUBE_CONTEXT': {

157

'client': {

158

'clientName': 'IOS',

159

'clientVersion': '16.46',

160

'deviceModel': 'iPhone14,3',

161

}

162

},

163

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

164

'REQUIRE_JS_PLAYER': False

165

},

166

'ios_embedded': {

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS_MESSAGES_EXTENSION',

170

'clientVersion': '16.46',

171

'deviceModel': 'iPhone14,3',

172

},

173

},

174

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

175

'REQUIRE_JS_PLAYER': False

176

},

177

'ios_music': {

178

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MUSIC',

182

'clientVersion': '4.57',

183

},

184

},

185

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

186

'REQUIRE_JS_PLAYER': False

187

},

188

'ios_creator': {

189

'INNERTUBE_CONTEXT': {

190

'client': {

191

'clientName': 'IOS_CREATOR',

192

'clientVersion': '21.47',

193

},

194

},

195

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

196

'REQUIRE_JS_PLAYER': False

197

},

198

# mweb has 'ultralow' formats

199

# See: https://github.com/yt-dlp/yt-dlp/pull/557

200

'mweb': {

201

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

202

'INNERTUBE_CONTEXT': {

203

'client': {

204

'clientName': 'MWEB',

205

'clientVersion': '2.20211221.01.00',

206

}

207

},

208

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

209

},

210

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

211

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

212

'tv_embedded': {

213

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

214

'INNERTUBE_CONTEXT': {

215

'client': {

216

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

217

'clientVersion': '2.0',

218

},

219

},

220

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

226

variant, *base = client_name.rsplit('.', 1)

227

if base:

228

return variant, base[0], variant

229

base, *variant = client_name.split('_', 1)

230

return client_name, base, variant[0] if variant else None

231

232

233

def build_innertube_clients():

234

THIRD_PARTY = {

235

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

236

}

237

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

238

priority = qualities(BASE_CLIENTS[::-1])

239

240

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

241

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

242

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

243

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

244

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

245

246

_, base_client, variant = _split_innertube_client(client)

247

ytcfg['priority'] = 10 * priority(base_client)

248

249

if not variant:

250

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

251

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

252

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

253

embedscreen['priority'] -= 3

254

elif variant == 'embedded':

255

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

256

ytcfg['priority'] -= 2

257

else:

258

ytcfg['priority'] -= 3

259

260

261

build_innertube_clients()

262

263

264

class YoutubeBaseInfoExtractor(InfoExtractor):

265

"""Provide base functions for Youtube extractors"""

266

267

_RESERVED_NAMES = (

268

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

274

275

# _NETRC_MACHINE = 'youtube'

276

277

# If True it will raise an error if no login info is provided

278

_LOGIN_REQUIRED = False

279

280

_INVIDIOUS_SITES = (

281

# invidious-redirect websites

282

r'(?:www\.)?redirect\.invidious\.io',

283

r'(?:(?:www|dev)\.)?invidio\.us',

284

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

285

r'(?:www\.)?invidious\.pussthecat\.org',

286

r'(?:www\.)?invidious\.zee\.li',

287

r'(?:www\.)?invidious\.ethibox\.fr',

288

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

289

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

290

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

291

# youtube-dl invidious instances list

292

r'(?:(?:www|no)\.)?invidiou\.sh',

293

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

294

r'(?:www\.)?invidious\.kabi\.tk',

295

r'(?:www\.)?invidious\.mastodon\.host',

296

r'(?:www\.)?invidious\.zapashcanon\.fr',

297

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

298

r'(?:www\.)?invidious\.tinfoil-hat\.net',

299

r'(?:www\.)?invidious\.himiko\.cloud',

300

r'(?:www\.)?invidious\.reallyancient\.tech',

301

r'(?:www\.)?invidious\.tube',

302

r'(?:www\.)?invidiou\.site',

303

r'(?:www\.)?invidious\.site',

304

r'(?:www\.)?invidious\.xyz',

305

r'(?:www\.)?invidious\.nixnet\.xyz',

306

r'(?:www\.)?invidious\.048596\.xyz',

307

r'(?:www\.)?invidious\.drycat\.fr',

308

r'(?:www\.)?inv\.skyn3t\.in',

309

r'(?:www\.)?tube\.poal\.co',

310

r'(?:www\.)?tube\.connect\.cafe',

311

r'(?:www\.)?vid\.wxzm\.sx',

312

r'(?:www\.)?vid\.mint\.lgbt',

313

r'(?:www\.)?vid\.puffyan\.us',

314

r'(?:www\.)?yewtu\.be',

315

r'(?:www\.)?yt\.elukerio\.org',

316

r'(?:www\.)?yt\.lelux\.fi',

317

r'(?:www\.)?invidious\.ggc-project\.de',

318

r'(?:www\.)?yt\.maisputain\.ovh',

319

r'(?:www\.)?ytprivate\.com',

320

r'(?:www\.)?invidious\.13ad\.de',

321

r'(?:www\.)?invidious\.toot\.koeln',

322

r'(?:www\.)?invidious\.fdn\.fr',

323

r'(?:www\.)?watch\.nettohikari\.com',

324

r'(?:www\.)?invidious\.namazso\.eu',

325

r'(?:www\.)?invidious\.silkky\.cloud',

326

r'(?:www\.)?invidious\.exonip\.de',

327

r'(?:www\.)?invidious\.riverside\.rocks',

328

r'(?:www\.)?invidious\.blamefran\.net',

329

r'(?:www\.)?invidious\.moomoo\.de',

330

r'(?:www\.)?ytb\.trom\.tf',

331

r'(?:www\.)?yt\.cyberhost\.uk',

332

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

333

r'(?:www\.)?qklhadlycap4cnod\.onion',

334

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

335

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

336

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

337

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

338

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

339

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

340

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

341

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

342

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

343

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

344

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

345

r'(?:www\.)?piped\.kavin\.rocks',

346

r'(?:www\.)?piped\.silkky\.cloud',

347

r'(?:www\.)?piped\.tokhmi\.xyz',

348

r'(?:www\.)?piped\.moomoo\.me',

349

r'(?:www\.)?il\.ax',

350

r'(?:www\.)?piped\.syncpundit\.com',

351

r'(?:www\.)?piped\.mha\.fi',

352

r'(?:www\.)?piped\.mint\.lgbt',

353

r'(?:www\.)?piped\.privacy\.com\.de',

354

)

355

356

def _initialize_consent(self):

357

cookies = self._get_cookies('https://www.youtube.com/')

358

if cookies.get('__Secure-3PSID'):

359

return

360

consent_id = None

361

consent = cookies.get('CONSENT')

362

if consent:

363

if 'YES' in consent.value:

364

return

365

consent_id = self._search_regex(

366

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

367

if not consent_id:

368

consent_id = random.randint(100, 999)

369

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

370

371

def _initialize_pref(self):

372

cookies = self._get_cookies('https://www.youtube.com/')

373

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

378

except ValueError:

379

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

380

pref.update({'hl': 'en', 'tz': 'UTC'})

381

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

382

383

def _real_initialize(self):

384

self._initialize_pref()

385

self._initialize_consent()

386

self._check_login_required()

387

388

def _check_login_required(self):

389

if self._LOGIN_REQUIRED and not self._cookies_passed:

390

self.raise_login_required('Login details are needed to download this content', method='cookies')

391

392

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

393

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

394

395

def _get_default_ytcfg(self, client='web'):

396

return copy.deepcopy(INNERTUBE_CLIENTS[client])

397

398

def _get_innertube_host(self, client='web'):

399

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

400

401

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

402

# try_get but with fallback to default ytcfg client values when present

403

_func = lambda y: try_get(y, getter, expected_type)

404

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

405

406

def _extract_client_name(self, ytcfg, default_client='web'):

407

return self._ytcfg_get_safe(

408

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

409

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

410

411

def _extract_client_version(self, ytcfg, default_client='web'):

412

return self._ytcfg_get_safe(

413

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

414

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

415

416

def _select_api_hostname(self, req_api_hostname, default_client=None):

417

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

418

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

419

420

def _extract_api_key(self, ytcfg=None, default_client='web'):

421

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

422

423

def _extract_context(self, ytcfg=None, default_client='web'):

424

context = get_first(

425

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

426

# Enforce language and tz for extraction

427

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

428

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

434

time_now = round(time.time())

435

if self._SAPISID is None:

436

yt_cookies = self._get_cookies('https://www.youtube.com')

437

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

438

# See: https://github.com/yt-dlp/yt-dlp/issues/393

439

sapisid_cookie = dict_get(

440

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

441

if sapisid_cookie and sapisid_cookie.value:

442

self._SAPISID = sapisid_cookie.value

443

self.write_debug('Extracted SAPISID cookie')

444

# SAPISID cookie is required if not already present

445

if not yt_cookies.get('SAPISID'):

446

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

447

self._set_cookie(

448

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

449

else:

450

self._SAPISID = False

451

if not self._SAPISID:

452

return None

453

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

454

sapisidhash = hashlib.sha1(

455

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

456

return f'SAPISIDHASH {time_now}_{sapisidhash}'

457

458

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

459

note='Downloading API JSON', errnote='Unable to download API page',

460

context=None, api_key=None, api_hostname=None, default_client='web'):

461

462

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

463

data.update(query)

464

real_headers = self.generate_api_headers(default_client=default_client)

465

real_headers.update({'content-type': 'application/json'})

466

if headers:

467

real_headers.update(headers)

468

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

469

or api_key or self._extract_api_key(default_client=default_client))

470

return self._download_json(

471

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

472

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

473

data=json.dumps(data).encode('utf8'), headers=real_headers,

474

query={'key': api_key, 'prettyPrint': 'false'})

475

476

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

477

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

478

479

@staticmethod

480

def _extract_session_index(*data):

481

"""

482

Index of current account in account list.

483

See: https://github.com/yt-dlp/yt-dlp/pull/519

484

"""

485

for ytcfg in data:

486

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

487

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

492

if ytcfg:

493

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

498

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

499

'identity token', default=None, fatal=False)

500

501

@staticmethod

502

def _extract_account_syncid(*args):

503

"""

504

Extract syncId required to download private playlists of secondary channels

505

@params response and/or ytcfg

506

"""

507

for data in args:

508

# ytcfg includes channel_syncid if on secondary channel

509

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

514

lambda x: x['DATASYNC_ID']), str) or '').split('||')

515

if len(sync_ids) >= 2 and sync_ids[1]:

516

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

517

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

522

"""

523

Extracts visitorData from an API response or ytcfg

524

Appears to be used to track session state

525

"""

526

return get_first(

527

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

528

expected_type=str)

529

530

@functools.cached_property

531

def is_authenticated(self):

532

return bool(self._generate_sapisidhash_header())

533

534

def extract_ytcfg(self, video_id, webpage):

535

if not webpage:

536

return {}

537

return self._parse_json(

538

self._search_regex(

539

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

540

default='{}'), video_id, fatal=False) or {}

541

542

def generate_api_headers(

543

self, *, ytcfg=None, account_syncid=None, session_index=None,

544

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

545

546

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

547

headers = {

548

'X-YouTube-Client-Name': str(

549

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

550

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

551

'Origin': origin,

552

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

553

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

554

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

555

}

556

if session_index is None:

557

session_index = self._extract_session_index(ytcfg)

558

if account_syncid or session_index is not None:

559

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

560

561

auth = self._generate_sapisidhash_header(origin)

562

if auth is not None:

563

headers['Authorization'] = auth

564

headers['X-Origin'] = origin

565

return {h: v for h, v in headers.items() if v is not None}

566

567

def _download_ytcfg(self, client, video_id):

568

url = {

569

'web': 'https://www.youtube.com',

570

'web_music': 'https://music.youtube.com',

571

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

576

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

577

return self.extract_ytcfg(video_id, webpage) or {}

578

579

@staticmethod

580

def _build_api_continuation_query(continuation, ctp=None):

581

query = {

582

'continuation': continuation

583

}

584

# TODO: Inconsistency with clickTrackingParams.

585

# Currently we have a fixed ctp contained within context (from ytcfg)

586

# and a ctp in root query for continuation.

587

if ctp:

588

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

593

next_continuation = try_get(

594

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

595

lambda x: x['continuation']['reloadContinuationData']), dict)

596

if not next_continuation:

597

return

598

continuation = next_continuation.get('continuation')

599

if not continuation:

600

return

601

ctp = next_continuation.get('clickTrackingParams')

602

return cls._build_api_continuation_query(continuation, ctp)

603

604

@classmethod

605

def _extract_continuation_ep_data(cls, continuation_ep: dict):

606

if isinstance(continuation_ep, dict):

607

continuation = try_get(

608

continuation_ep, lambda x: x['continuationCommand']['token'], str)

609

if not continuation:

610

return

611

ctp = continuation_ep.get('clickTrackingParams')

612

return cls._build_api_continuation_query(continuation, ctp)

613

614

@classmethod

615

def _extract_continuation(cls, renderer):

616

next_continuation = cls._extract_next_continuation_data(renderer)

617

if next_continuation:

618

return next_continuation

619

620

contents = []

621

for key in ('contents', 'items'):

622

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

623

624

for content in contents:

625

if not isinstance(content, dict):

626

continue

627

continuation_ep = try_get(

628

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

629

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

630

dict)

631

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

637

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

638

if not isinstance(alert_dict, dict):

639

continue

640

for alert in alert_dict.values():

641

alert_type = alert.get('type')

642

if not alert_type:

643

continue

644

message = cls._get_text(alert, 'text')

645

if message:

646

yield alert_type, message

647

648

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

649

errors = []

650

warnings = []

651

for alert_type, alert_message in alerts:

652

if alert_type.lower() == 'error' and fatal:

653

errors.append([alert_type, alert_message])

654

else:

655

warnings.append([alert_type, alert_message])

656

657

for alert_type, alert_message in (warnings + errors[:-1]):

658

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

659

if errors:

660

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

661

662

def _extract_and_report_alerts(self, data, *args, **kwargs):

663

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

664

665

def _extract_badges(self, renderer: dict):

666

badges = set()

667

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

668

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

669

if label:

670

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

675

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

680

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

681

obj = [obj]

682

for item in obj:

683

text = try_get(item, lambda x: x['simpleText'], str)

684

if text:

685

return text

686

runs = try_get(item, lambda x: x['runs'], list) or []

687

if not runs and isinstance(item, list):

688

runs = item

689

690

runs = runs[:min(len(runs), max_runs or len(runs))]

691

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

696

count_text = self._get_text(data, *path_list) or ''

697

count = parse_count(count_text)

698

if count is None:

699

count = str_to_int(

700

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

705

"""

706

Extract thumbnails from thumbnails dict

707

@param path_list: path list to level that contains 'thumbnails' key

708

"""

709

thumbnails = []

710

for path in path_list or [()]:

711

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

712

thumbnail_url = url_or_none(thumbnail.get('url'))

713

if not thumbnail_url:

714

continue

715

# Sometimes youtube gives a wrong thumbnail URL. See:

716

# https://github.com/yt-dlp/yt-dlp/issues/233

717

# https://github.com/ytdl-org/youtube-dl/issues/28023

718

if 'maxresdefault' in thumbnail_url:

719

thumbnail_url = thumbnail_url.split('?')[0]

720

thumbnails.append({

721

'url': thumbnail_url,

722

'height': int_or_none(thumbnail.get('height')),

723

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

729

"""

730

Extracts a relative time from string and converts to dt object

731

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

736

if start:

737

return datetime_from_str(start)

738

try:

739

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

744

"""@returns (timestamp, time_text)"""

745

text = self._get_text(renderer, *path_list) or ''

746

dt = self.extract_relative_time(text)

747

timestamp = None

748

if isinstance(dt, datetime.datetime):

749

timestamp = calendar.timegm(dt.timetuple())

750

751

if timestamp is None:

752

timestamp = (

753

unified_timestamp(text) or unified_timestamp(

754

self._search_regex(

755

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

756

text.lower(), 'time text', default=None)))

757

758

if text and timestamp is None:

759

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

760

return timestamp, text

761

762

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

763

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

764

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

769

if check_get_keys is None:

770

check_get_keys = []

771

while count < retries:

772

count += 1

773

if last_error:

774

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

775

try:

776

response = self._call_api(

777

ep=ep, fatal=True, headers=headers,

778

video_id=item_id, query=query,

779

context=self._extract_context(ytcfg, default_client),

780

api_key=self._extract_api_key(ytcfg, default_client),

781

api_hostname=api_hostname, default_client=default_client,

782

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

783

except ExtractorError as e:

784

if isinstance(e.cause, network_exceptions):

785

if isinstance(e.cause, urllib.error.HTTPError):

786

first_bytes = e.cause.read(512)

787

if not is_html(first_bytes):

788

yt_error = try_get(

789

self._parse_json(

790

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

791

lambda x: x['error']['message'], str)

792

if yt_error:

793

self._report_alerts([('ERROR', yt_error)], fatal=False)

794

# Downloading page may result in intermittent 5xx HTTP error

795

# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289

796

# We also want to catch all other network exceptions since errors in later pages can be troublesome

797

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

798

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

799

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

811

except ExtractorError as e:

812

# YouTube servers may return errors we want to retry on in a 200 OK response

813

# See: https://github.com/yt-dlp/yt-dlp/issues/839

814

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

820

return

821

if not check_get_keys or dict_get(response, check_get_keys):

822

break

823

# Youtube sometimes sends incomplete data

824

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

825

last_error = 'Incomplete data received'

826

if count >= retries:

827

if fatal:

828

raise ExtractorError(last_error)

829

else:

830

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

836

return re.match(r'https?://music\.youtube\.com/', url) is not None

837

838

def _extract_video(self, renderer):

839

video_id = renderer.get('videoId')

840

title = self._get_text(renderer, 'title')

841

description = self._get_text(renderer, 'descriptionSnippet')

842

duration = parse_duration(self._get_text(

843

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

844

if duration is None:

845

duration = parse_duration(self._search_regex(

846

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

847

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

848

video_id, default=None, group='duration'))

849

850

view_count = self._get_count(renderer, 'viewCountText')

851

852

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

853

channel_id = traverse_obj(

854

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

855

expected_type=str, get_all=False)

856

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

857

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

858

overlay_style = traverse_obj(

859

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

860

get_all=False, expected_type=str)

861

badges = self._extract_badges(renderer)

862

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

863

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

864

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

865

expected_type=str)) or ''

866

url = f'https://www.youtube.com/watch?v={video_id}'

867

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

868

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

877

'duration': duration,

878

'view_count': view_count,

879

'uploader': uploader,

880

'channel_id': channel_id,

881

'thumbnails': thumbnails,

882

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

883

if self._configuration_arg('approximate_date', ie_key='youtubetab')

884

else None),

885

'live_status': ('is_upcoming' if scheduled_timestamp is not None

886

else 'was_live' if 'streamed' in time_text.lower()

887

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

888

else None),

889

'release_timestamp': scheduled_timestamp,

890

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

895

IE_DESC = 'YouTube'

896

_VALID_URL = r"""(?x)^

897

(

898

(?:https?://|//) # http(s):// or protocol-independent URL

899

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

900

(?:www\.)?deturl\.com/www\.youtube\.com|

901

(?:www\.)?pwnyoutube\.com|

902

(?:www\.)?hooktube\.com|

903

(?:www\.)?yourepeat\.com|

904

tube\.majestyc\.net|

905

%(invidious)s|

906

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

907

(?:.*?\#/)? # handle anchor (#/) redirect urls

908

(?: # the various things that can precede the ID:

909

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

910

|(?: # or the v= param in all its forms

911

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

912

(?:\?|\#!?) # the params delimiter ? or # or #!

913

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

919

vid\.plus| # or vid.plus/xxxx

920

zwearz\.com/watch| # or zwearz.com/watch/xxxx

921

%(invidious)s

922

)/

923

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

924

)

925

)? # all until now is optional -> you can pass the naked ID

926

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

927

(?(1).+)? # if we found the ID, everything can follow

928

(?:\#|$)""" % {

929

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

930

}

931

_PLAYER_INFO_RE = (

932

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

933

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

934

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

935

)

936

_formats = {

937

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

938

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

939

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

940

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

941

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

942

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

943

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

944

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

945

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

946

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

947

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

948

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

949

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

950

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

951

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

952

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

953

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

954

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

959

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

960

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

961

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

962

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

963

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

964

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

965

966

# Apple HTTP Live Streaming

967

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

968

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

969

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

970

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

971

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

972

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

973

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

974

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

975

976

# DASH mp4 video

977

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

979

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

983

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

984

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

987

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

988

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

990

# Dash mp4 audio

991

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

992

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

993

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

994

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

995

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

996

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

997

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

998

999

# Dash webm

1000

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1003

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1004

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1005

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1006

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1007

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1012

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1014

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1016

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1018

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1019

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1020

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1022

1023

# Dash webm audio

1024

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1025

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1026

1027

# Dash webm audio with opus inside

1028

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1029

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1030

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1031

1032

# RTMP (unnamed)

1033

'_rtmp': {'protocol': 'rtmp'},

1034

1035

# av01 video only formats sometimes served with "unknown" codecs

1036

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1037

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1038

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1039

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1040

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1041

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1042

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1043

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1044

}

1045

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1057

'uploader': 'Philipp Hagemeister',

1058

'uploader_id': 'phihag',

1059

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1060

'channel': 'Philipp Hagemeister',

1061

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1062

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1063

'upload_date': '20121002',

1064

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1065

'categories': ['Science & Technology'],

1066

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1071

'playable_in_embed': True,

1072

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1073

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1082

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1087

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1088

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1089

'uploader': 'SET India',

1090

'uploader_id': 'setindia',

1091

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1092

'age_limit': 18,

1093

},

1094

'skip': 'Private video',

1095

},

1096

{

1097

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1098

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1103

'uploader': 'Philipp Hagemeister',

1104

'uploader_id': 'phihag',

1105

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1106

'channel': 'Philipp Hagemeister',

1107

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1108

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1109

'upload_date': '20121002',

1110

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1111

'categories': ['Science & Technology'],

1112

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1117

'playable_in_embed': True,

1118

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1119

'live_status': 'not_live',

1120

'age_limit': 0,

1121

'channel_follower_count': int

1122

},

1123

'params': {

1124

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1129

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1134

'uploader_id': '8KVIDEO',

1135

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1136

'description': '',

1137

'uploader': '8KVIDEO',

1138

'title': 'UHDTV TEST 8K VIDEO.mp4'

1139

},

1140

'params': {

1141

'youtube_include_dash_manifest': True,

1142

'format': '141',

1143

},

1144

'skip': 'format 141 not served anymore',

1145

},

1146

# DASH manifest with encrypted signature

1147

{

1148

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1153

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1154

'duration': 244,

1155

'uploader': 'AfrojackVEVO',

1156

'uploader_id': 'AfrojackVEVO',

1157

'upload_date': '20131011',

1158

'abr': 129.495,

1159

'like_count': int,

1160

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1161

'playable_in_embed': True,

1162

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1163

'view_count': int,

1164

'track': 'The Spark',

1165

'live_status': 'not_live',

1166

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1167

'channel': 'Afrojack',

1168

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1169

'tags': 'count:19',

1170

'availability': 'public',

1171

'categories': ['Music'],

1172

'age_limit': 0,

1173

'alt_title': 'The Spark',

1174

'channel_follower_count': int

1175

},

1176

'params': {

1177

'youtube_include_dash_manifest': True,

1178

'format': '141/bestaudio[ext=m4a]',

1179

},

1180

},

1181

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1182

{

1183

'note': 'Embed allowed age-gate video',

1184

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1189

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1190

'duration': 142,

1191

'uploader': 'The Witcher',

1192

'uploader_id': 'WitcherGame',

1193

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1194

'upload_date': '20140605',

1195

'age_limit': 18,

1196

'categories': ['Gaming'],

1197

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1198

'availability': 'needs_auth',

1199

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1200

'like_count': int,

1201

'channel': 'The Witcher',

1202

'live_status': 'not_live',

1203

'tags': 'count:17',

1204

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1205

'playable_in_embed': True,

1206

'view_count': int,

1207

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1212

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1217

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1218

'upload_date': '20200408',

1219

'uploader_id': 'FlyingKitty900',

1220

'uploader': 'FlyingKitty',

1221

'age_limit': 18,

1222

'availability': 'needs_auth',

1223

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1224

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1225

'channel': 'FlyingKitty',

1226

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1227

'view_count': int,

1228

'categories': ['Entertainment'],

1229

'live_status': 'not_live',

1230

'tags': ['Flyingkitty', 'godzilla 2'],

1231

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1232

'like_count': int,

1233

'duration': 177,

1234

'playable_in_embed': True,

1235

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1240

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1241

'info_dict': {

1242

'id': 'Tq92D6wQ1mg',

1243

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1244

'ext': 'mp4',

1245

'upload_date': '20191228',

1246

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1247

'uploader': 'Projekt Melody',

1248

'description': 'md5:17eccca93a786d51bc67646756894066',

1249

'age_limit': 18,

1250

'like_count': int,

1251

'availability': 'needs_auth',

1252

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1253

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1254

'view_count': int,

1255

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1256

'channel': 'Projekt Melody',

1257

'live_status': 'not_live',

1258

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1259

'playable_in_embed': True,

1260

'categories': ['Entertainment'],

1261

'duration': 106,

1262

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1263

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1268

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1273

'uploader': 'Herr Lurik',

1274

'uploader_id': 'st3in234',

1275

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1276

'upload_date': '20130730',

1277

'track': 'Such mich find mich',

1278

'age_limit': 0,

1279

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1280

'like_count': int,

1281

'playable_in_embed': False,

1282

'creator': 'OOMPH!',

1283

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1284

'view_count': int,

1285

'alt_title': 'Such mich find mich',

1286

'duration': 210,

1287

'channel': 'Herr Lurik',

1288

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1289

'categories': ['Music'],

1290

'availability': 'public',

1291

'uploader_url': 'http://www.youtube.com/user/st3in234',

1292

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1293

'live_status': 'not_live',

1294

'artist': 'OOMPH!',

1295

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1300

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1301

'only_matching': True,

1302

},

1303

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1304

# YouTube Red ad is not captured for creator

1305

{

1306

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1312

'uploader_id': 'deadmau5',

1313

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1314

'creator': 'deadmau5',

1315

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1316

'uploader': 'deadmau5',

1317

'title': 'Deadmau5 - Some Chords (HD)',

1318

'alt_title': 'Some Chords',

1319

'availability': 'public',

1320

'tags': 'count:14',

1321

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1322

'view_count': int,

1323

'live_status': 'not_live',

1324

'channel': 'deadmau5',

1325

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1326

'like_count': int,

1327

'track': 'Some Chords',

1328

'artist': 'deadmau5',

1329

'playable_in_embed': True,

1330

'age_limit': 0,

1331

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1332

'categories': ['Music'],

1333

'album': 'Some Chords',

1334

'channel_follower_count': int

1335

},

1336

'expected_warnings': [

1337

'DASH manifest missing',

1338

]

1339

},

1340

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1341

{

1342

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1348

'uploader_id': 'olympic',

1349

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1350

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1351

'uploader': 'Olympics',

1352

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1353

'like_count': int,

1354

'release_timestamp': 1343767800,

1355

'playable_in_embed': True,

1356

'categories': ['Sports'],

1357

'release_date': '20120731',

1358

'channel': 'Olympics',

1359

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1360

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1361

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1362

'age_limit': 0,

1363

'availability': 'public',

1364

'live_status': 'was_live',

1365

'view_count': int,

1366

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1367

'channel_follower_count': int

1368

},

1369

'params': {

1370

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1380

'duration': 85,

1381

'upload_date': '20110310',

1382

'uploader_id': 'AllenMeow',

1383

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1384

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1385

'uploader': '孫ᄋᄅ',

1386

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1387

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1392

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1393

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1394

'view_count': int,

1395

'categories': ['People & Blogs'],

1396

'like_count': int,

1397

'live_status': 'not_live',

1398

'availability': 'unlisted',

1399

'channel_follower_count': int

1400

},

1401

},

1402

# url_encoded_fmt_stream_map is empty string

1403

{

1404

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1409

'description': '',

1410

'upload_date': '20150404',

1411

'uploader_id': 'spbelect',

1412

'uploader': 'Наблюдатели Петербурга',

1413

},

1414

'params': {

1415

'skip_download': 'requires avconv',

1416

},

1417

'skip': 'This live event has ended.',

1418

},

1419

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1420

{

1421

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1426

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1427

'duration': 220,

1428

'upload_date': '20150625',

1429

'uploader_id': 'dorappi2000',

1430

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1431

'uploader': 'dorappi2000',

1432

'formats': 'mincount:31',

1433

},

1434

'skip': 'not actual anymore',

1435

},

1436

# DASH manifest with segment_list

1437

{

1438

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1439

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1444

'uploader': 'Airtek',

1445

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1446

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1447

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1448

},

1449

'params': {

1450

'youtube_include_dash_manifest': True,

1451

'format': '135', # bestvideo

1452

},

1453

'skip': 'This live event has ended.',

1454

},

1455

{

1456

# Multifeed videos (multiple cameras), URL is for Main Camera

1457

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1458

'info_dict': {

1459

'id': 'jvGDaLqkpTg',

1460

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1461

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

1469

'duration': 10643,

1470

'upload_date': '20161111',

1471

'uploader': 'Team PGP',

1472

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1473

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1480

'description': 'md5:e03b909557865076822aa169218d6a5d',

1481

'duration': 10991,

1482

'upload_date': '20161111',

1483

'uploader': 'Team PGP',

1484

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1485

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1492

'description': 'md5:e03b909557865076822aa169218d6a5d',

1493

'duration': 10995,

1494

'upload_date': '20161111',

1495

'uploader': 'Team PGP',

1496

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1504

'description': 'md5:e03b909557865076822aa169218d6a5d',

1505

'duration': 10990,

1506

'upload_date': '20161111',

1507

'uploader': 'Team PGP',

1508

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1514

},

1515

'skip': 'Not multifeed anymore',

1516

},

1517

{

1518

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1519

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1520

'info_dict': {

1521

'id': 'gVfLd0zydlo',

1522

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1523

},

1524

'playlist_count': 2,

1525

'skip': 'Not multifeed anymore',

1526

},

1527

{

1528

'url': 'https://vid.plus/FlRa-iH7PGw',

1529

'only_matching': True,

1530

},

1531

{

1532

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1533

'only_matching': True,

1534

},

1535

{

1536

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1537

# Also tests cut-off URL expansion in video description (see

1538

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1539

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1540

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1545

'alt_title': 'Dark Walk',

1546

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1547

'duration': 133,

1548

'upload_date': '20151119',

1549

'uploader_id': 'IronSoulElf',

1550

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1551

'uploader': 'IronSoulElf',

1552

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1553

'track': 'Dark Walk',

1554

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1555

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1556

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1557

'categories': ['Film & Animation'],

1558

'view_count': int,

1559

'live_status': 'not_live',

1560

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1561

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1562

'tags': 'count:13',

1563

'availability': 'public',

1564

'channel': 'IronSoulElf',

1565

'playable_in_embed': True,

1566

'like_count': int,

1567

'age_limit': 0,

1568

'channel_follower_count': int

1569

},

1570

'params': {

1571

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1576

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1577

'only_matching': True,

1578

},

1579

{

1580

# Video with yt:stretch=17:0

1581

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1586

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1587

'upload_date': '20151107',

1588

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1589

'uploader': 'CH GAMER DROID',

1590

},

1591

'params': {

1592

'skip_download': True,

1593

},

1594

'skip': 'This video does not exist.',

1595

},

1596

{

1597

# Video with incomplete 'yt:stretch=16:'

1598

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1599

'only_matching': True,

1600

},

1601

{

1602

# Video licensed under Creative Commons

1603

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1608

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1609

'duration': 721,

1610

'upload_date': '20150128',

1611

'uploader_id': 'BerkmanCenter',

1612

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1613

'uploader': 'The Berkman Klein Center for Internet & Society',

1614

'license': 'Creative Commons Attribution license (reuse allowed)',

1615

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1616

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1617

'like_count': int,

1618

'age_limit': 0,

1619

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1620

'channel': 'The Berkman Klein Center for Internet & Society',

1621

'availability': 'public',

1622

'view_count': int,

1623

'categories': ['Education'],

1624

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1625

'live_status': 'not_live',

1626

'playable_in_embed': True,

1627

'channel_follower_count': int

1628

},

1629

'params': {

1630

'skip_download': True,

},

},

{

# Channel-like uploader_url

1635

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1640

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1641

'duration': 4060,

1642

'upload_date': '20151120',

1643

'uploader': 'Bernie Sanders',

1644

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1645

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1646

'license': 'Creative Commons Attribution license (reuse allowed)',

1647

'playable_in_embed': True,

1648

'tags': 'count:12',

1649

'like_count': int,

1650

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1651

'age_limit': 0,

1652

'availability': 'public',

1653

'categories': ['News & Politics'],

1654

'channel': 'Bernie Sanders',

1655

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1656

'view_count': int,

1657

'live_status': 'not_live',

1658

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1659

'channel_follower_count': int

1660

},

1661

'params': {

1662

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1667

'only_matching': True,

1668

},

1669

{

1670

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1671

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1672

'only_matching': True,

1673

},

1674

{

1675

# Rental video preview

1676

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1681

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1682

'upload_date': '20150811',

1683

'uploader': 'FlixMatrix',

1684

'uploader_id': 'FlixMatrixKaravan',

1685

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1686

'license': 'Standard YouTube License',

1687

},

1688

'params': {

1689

'skip_download': True,

1690

},

1691

'skip': 'This video is not available.',

1692

},

1693

{

1694

# YouTube Red video with episode data

1695

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1700

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1701

'duration': 2085,

1702

'upload_date': '20170118',

1703

'uploader': 'Vsauce',

1704

'uploader_id': 'Vsauce',

1705

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1706

'series': 'Mind Field',

1707

'season_number': 1,

1708

'episode_number': 1,

1709

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1710

'tags': 'count:12',

1711

'view_count': int,

1712

'availability': 'public',

1713

'age_limit': 0,

1714

'channel': 'Vsauce',

1715

'episode': 'Episode 1',

1716

'categories': ['Entertainment'],

1717

'season': 'Season 1',

1718

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1719

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1720

'like_count': int,

1721

'playable_in_embed': True,

1722

'live_status': 'not_live',

1723

'channel_follower_count': int

1724

},

1725

'params': {

1726

'skip_download': True,

1727

},

1728

'expected_warnings': [

1729

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1734

# as inappropriate or offensive to some audiences.

1735

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1740

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1741

'duration': 965,

1742

'upload_date': '20140124',

1743

'uploader': 'New Century Foundation',

1744

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1745

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1746

},

1747

'params': {

1748

'skip_download': True,

1749

},

1750

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1755

'only_matching': True,

1756

},

1757

{

1758

# geo restricted to JP

1759

'url': 'sJL6WA-aGkQ',

1760

'only_matching': True,

1761

},

1762

{

1763

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1764

'only_matching': True,

1765

},

1766

{

1767

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1768

'only_matching': True,

1769

},

1770

{

1771

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1772

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1773

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1778

'only_matching': True,

1779

},

1780

{

1781

# Video with unsupported adaptive stream type formats

1782

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1787

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1788

'duration': 433,

1789

'upload_date': '20130923',

1790

'uploader': 'Amelia Putri Harwita',

1791

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1792

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1793

'formats': 'maxcount:10',

1794

},

1795

'params': {

1796

'skip_download': True,

1797

'youtube_include_dash_manifest': False,

1798

},

1799

'skip': 'not actual anymore',

1800

},

1801

{

1802

# Youtube Music Auto-generated description

1803

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1808

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1809

'upload_date': '20190312',

1810

'uploader': 'Stephen - Topic',

1811

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1812

'artist': 'Stephen',

1813

'track': 'Voyeur Girl',

1814

'album': 'it\'s too much love to know my dear',

1815

'release_date': '20190313',

1816

'release_year': 2019,

1817

'alt_title': 'Voyeur Girl',

1818

'view_count': int,

1819

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1820

'playable_in_embed': True,

1821

'like_count': int,

1822

'categories': ['Music'],

1823

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1824

'channel': 'Stephen',

1825

'availability': 'public',

1826

'creator': 'Stephen',

1827

'duration': 169,

1828

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1829

'age_limit': 0,

1830

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1831

'tags': 'count:11',

1832

'live_status': 'not_live',

1833

'channel_follower_count': int

1834

},

1835

'params': {

1836

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1841

'only_matching': True,

1842

},

1843

{

1844

# invalid -> valid video id redirection

1845

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1850

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1851

'upload_date': '20090125',

1852

'uploader': 'Prochorowka',

1853

'uploader_id': 'Prochorowka',

1854

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1855

'artist': 'Panjabi MC',

1856

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1857

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1858

},

1859

'params': {

1860

'skip_download': True,

1861

},

1862

'skip': 'Video unavailable',

1863

},

1864

{

1865

# empty description results in an empty string

1866

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1873

'uploader_id': 'ElevageOrVert',

1874

'uploader': 'ElevageOrVert',

1875

'view_count': int,

1876

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1877

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1878

'like_count': int,

1879

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1880

'tags': [],

1881

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1882

'availability': 'public',

1883

'age_limit': 0,

1884

'categories': ['Pets & Animals'],

1885

'duration': 7,

1886

'playable_in_embed': True,

1887

'live_status': 'not_live',

1888

'channel': 'ElevageOrVert',

1889

'channel_follower_count': int

1890

},

1891

'params': {

1892

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1897

# see [2] for an example with '};' inside ytInitialPlayerResponse

1898

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1899

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1900

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1905

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1906

'upload_date': '20130831',

1907

'uploader_id': 'kudvenkat',

1908

'uploader': 'kudvenkat',

1909

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1910

'like_count': int,

1911

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1912

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1913

'live_status': 'not_live',

1914

'categories': ['Education'],

1915

'availability': 'public',

1916

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1917

'tags': 'count:12',

1918

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1923

'channel_follower_count': int

1924

},

1925

'params': {

1926

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1931

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1932

'only_matching': True,

1933

},

1934

{

1935

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1936

'only_matching': True,

1937

},

1938

{

1939

# https://github.com/ytdl-org/youtube-dl/pull/28094

1940

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1946

'upload_date': '20141120',

1947

'uploader': 'The Cinematic Orchestra - Topic',

1948

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1949

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1950

'artist': 'The Cinematic Orchestra',

1951

'track': 'Burn Out',

1952

'album': 'Every Day',

1953

'like_count': int,

1954

'live_status': 'not_live',

1955

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1960

'creator': 'The Cinematic Orchestra',

1961

'channel': 'The Cinematic Orchestra',

1962

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1963

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1964

'availability': 'public',

1965

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1966

'categories': ['Music'],

1967

'playable_in_embed': True,

1968

'channel_follower_count': int

1969

},

1970

'params': {

1971

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1976

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1977

'only_matching': True,

1978

},

1979

{

1980

# controversial video, requires bpctr/contentCheckOk

1981

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1986

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1987

'uploader': 'CBS Mornings',

1988

'uploader_id': 'CBSThisMorning',

1989

'upload_date': '20140716',

1990

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1991

'duration': 170,

1992

'categories': ['News & Politics'],

1993

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1994

'view_count': int,

1995

'channel': 'CBS Mornings',

1996

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1997

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1998

'age_limit': 18,

1999

'availability': 'needs_auth',

2000

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2001

'like_count': int,

2002

'live_status': 'not_live',

2003

'playable_in_embed': True,

2004

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2009

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2014

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2015

'upload_date': '20201120',

2016

'uploader': 'Walk around Japan',

2017

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2018

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2019

'duration': 1456,

2020

'categories': ['Travel & Events'],

2021

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2022

'view_count': int,

2023

'channel': 'Walk around Japan',

2024

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2025

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2026

'age_limit': 0,

2027

'availability': 'public',

2028

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2029

'live_status': 'not_live',

2030

'playable_in_embed': True,

2031

'channel_follower_count': int

2032

},

2033

'params': {

2034

'skip_download': True,

2035

},

2036

}, {

2037

# Has multiple audio streams

2038

'url': 'WaOKSUlf4TM',

2039

'only_matching': True

2040

}, {

2041

# Requires Premium: has format 141 when requested using YTM url

2042

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2043

'only_matching': True

2044

}, {

2045

# multiple subtitles with same lang_code

2046

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2047

'only_matching': True,

2048

}, {

2049

# Force use android client fallback

2050

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2051

'info_dict': {

2052

'id': 'YOelRv7fMxY',

2053

'title': 'DIGGING A SECRET TUNNEL Part 1',

2054

'ext': '3gp',

2055

'upload_date': '20210624',

2056

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2057

'uploader': 'colinfurze',

2058

'uploader_id': 'colinfurze',

2059

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2060

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2061

'duration': 596,

2062

'categories': ['Entertainment'],

2063

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2064

'view_count': int,

2065

'channel': 'colinfurze',

2066

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2067

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2068

'age_limit': 0,

2069

'availability': 'public',

2070

'like_count': int,

2071

'live_status': 'not_live',

2072

'playable_in_embed': True,

2073

'channel_follower_count': int

2074

},

2075

'params': {

2076

'format': '17', # 3gp format available on android

2077

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2082

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2083

'only_matching': True,

2084

'params': {

2085

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2090

'only_matching': True,

2091

}, {

2092

'note': 'Storyboards',

2093

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2099

'uploader_id': 'scishow',

2100

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2101

'upload_date': '20140324',

2102

'uploader': 'SciShow',

2103

'like_count': int,

2104

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2105

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2106

'view_count': int,

2107

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2108

'playable_in_embed': True,

2109

'tags': 'count:12',

2110

'uploader_url': 'http://www.youtube.com/user/scishow',

2111

'availability': 'public',

2112

'channel': 'SciShow',

2113

'live_status': 'not_live',

2114

'duration': 248,

2115

'categories': ['Education'],

2116

'age_limit': 0,

2117

'channel_follower_count': int

2118

}, 'params': {'format': 'mhtml', 'skip_download': True}

2119

}, {

2120

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2121

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2126

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2127

'uploader': 'Leon Nguyen',

2128

'uploader_id': 'VNSXIII',

2129

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2130

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2131

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2136

'tags': 'count:23',

2137

'playable_in_embed': True,

2138

'live_status': 'not_live',

2139

'upload_date': '20220103',

2140

'like_count': int,

2141

'availability': 'public',

2142

'channel': 'Leon Nguyen',

2143

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2144

'channel_follower_count': int

2145

}

2146

}, {

2147

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2148

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2153

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2154

'uploader': 'Quackity',

2155

'uploader_id': 'QuackityHQ',

2156

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2157

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2158

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2163

'tags': 'count:26',

2164

'playable_in_embed': True,

2165

'live_status': 'not_live',

2166

'release_timestamp': 1641172509,

2167

'release_date': '20220103',

2168

'upload_date': '20220103',

2169

'like_count': int,

2170

'availability': 'public',

2171

'channel': 'Quackity',

2172

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2173

'channel_follower_count': int

2174

}

2175

},

2176

{ # continuous livestream. Microformat upload date should be preferred.

2177

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2178

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2179

'info_dict': {

2180

'id': 'kgx4WGK0oNU',

2181

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2182

'ext': 'mp4',

2183

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2184

'availability': 'public',

2185

'age_limit': 0,

2186

'release_timestamp': 1637975704,

2187

'upload_date': '20210619',

2188

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2189

'live_status': 'is_live',

2190

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2191

'uploader': '阿鲍Abao',

2192

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2193

'channel': 'Abao in Tokyo',

2194

'channel_follower_count': int,

2195

'release_date': '20211127',

2196

'tags': 'count:39',

2197

'categories': ['People & Blogs'],

2198

'like_count': int,

2199

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2200

'view_count': int,

2201

'playable_in_embed': True,

2202

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2203

},

2204

'params': {'skip_download': True}

2205

}, {

2206

# Story. Requires specific player params to work.

2207

# Note: stories get removed after some period of time

2208

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2213

'view_count': int,

2214

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2215

'upload_date': '20220526',

2216

'categories': ['Education'],

2217

'title': 'Story',

2218

'channel': 'IT\'S HISTORY',

2219

'description': '',

2220

'uploader_id': 'BlastfromthePast',

2221

'duration': 12,

2222

'uploader': 'IT\'S HISTORY',

2223

'playable_in_embed': True,

2224

'age_limit': 0,

2225

'live_status': 'not_live',

2226

'tags': [],

2227

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2228

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2229

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2230

}

2231

}, {

2232

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2237

'upload_date': '20220323',

2238

'like_count': int,

2239

'availability': 'unlisted',

2240

'channel': 'nao20010128nao',

2241

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2242

'age_limit': 0,

2243

'uploader': 'nao20010128nao',

2244

'uploader_id': 'nao20010128nao',

2245

'categories': ['Music'],

2246

'view_count': int,

2247

'description': '',

2248

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2249

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2250

'live_status': 'not_live',

2251

'playable_in_embed': True,

2252

'channel_follower_count': int,

2253

'duration': 6,

2254

'tags': [],

2255

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2262

from ..utils import parse_qs

2263

2264

qs = parse_qs(url)

2265

if qs.get('list', [None])[0]:

2266

return False

2267

return super().suitable(url)

2268

2269

def __init__(self, *args, **kwargs):

2270

super().__init__(*args, **kwargs)

2271

self._code_cache = {}

2272

self._player_cache = {}

2273

2274

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2275

lock = threading.Lock()

2276

2277

is_live = True

2278

start_time = time.time()

2279

formats = [f for f in formats if f.get('is_from_start')]

2280

2281

def refetch_manifest(format_id, delay):

2282

nonlocal formats, start_time, is_live

2283

if time.time() <= start_time + delay:

2284

return

2285

2286

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2287

video_details = traverse_obj(

2288

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2289

microformats = traverse_obj(

2290

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2291

expected_type=dict, default=[])

2292

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2293

start_time = time.time()

2294

2295

def mpd_feed(format_id, delay):

2296

"""

2297

@returns (manifest_url, manifest_stream_number, is_live) or None

2298

"""

2299

with lock:

2300

refetch_manifest(format_id, delay)

2301

2302

f = next((f for f in formats if f['format_id'] == format_id), None)

2303

if not f:

2304

if not is_live:

2305

self.to_screen(f'{video_id}: Video is no longer live')

2306

else:

2307

self.report_warning(

2308

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2309

return None

2310

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2315

f['fragments'] = functools.partial(

2316

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2317

2318

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2319

FETCH_SPAN, MAX_DURATION = 5, 432000

2320

2321

mpd_url, stream_number, is_live = None, None, True

2322

2323

begin_index = 0

2324

download_start_time = ctx.get('start') or time.time()

2325

2326

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2327

if lack_early_segments:

2328

self.report_warning(bug_reports_message(

2329

'Starting download from the last 120 hours of the live stream since '

2330

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2331

lack_early_segments = True

2332

2333

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2334

fragments, fragment_base_url = None, None

2335

2336

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2337

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2338

# Obtain from MPD's maximum seq value

2339

old_mpd_url = mpd_url

2340

last_error = ctx.pop('last_error', None)

2341

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2342

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2343

or (mpd_url, stream_number, False))

2344

if not refresh_sequence:

2345

if expire_fast and not is_live:

2346

return False, last_seq

2347

elif old_mpd_url == mpd_url:

2348

return True, last_seq

2349

try:

2350

fmts, _ = self._extract_mpd_formats_and_subtitles(

2351

mpd_url, None, note=False, errnote=False, fatal=False)

2352

except ExtractorError:

2353

fmts = None

2354

if not fmts:

2355

no_fragment_score += 2

2356

return False, last_seq

2357

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2358

fragments = fmt_info['fragments']

2359

fragment_base_url = fmt_info['fragment_base_url']

2360

assert fragment_base_url

2361

2362

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2363

return True, _last_seq

2364

2365

while is_live:

2366

fetch_time = time.time()

2367

if no_fragment_score > 30:

2368

return

2369

if last_segment_url:

2370

# Obtain from "X-Head-Seqnum" header value from each segment

2371

try:

2372

urlh = self._request_webpage(

2373

last_segment_url, None, note=False, errnote=False, fatal=False)

2374

except ExtractorError:

2375

urlh = None

2376

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2377

if last_seq is None:

2378

no_fragment_score += 2

2379

last_segment_url = None

2380

continue

2381

else:

2382

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2383

no_fragment_score += 2

2384

if not should_continue:

2385

continue

2386

2387

if known_idx > last_seq:

2388

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2394

# skip from the start when it's negative value

2395

known_idx = last_seq + begin_index

2396

if lack_early_segments:

2397

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2398

try:

2399

for idx in range(known_idx, last_seq):

2400

# do not update sequence here or you'll get skipped some part of it

2401

should_continue, _ = _extract_sequence_from_mpd(False, False)

2402

if not should_continue:

2403

known_idx = idx - 1

2404

raise ExtractorError('breaking out of outer loop')

2405

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2406

yield {

2407

'url': last_segment_url,

2408

'fragment_count': last_seq,

2409

}

2410

if known_idx == last_seq:

2411

no_fragment_score += 5

2412

else:

2413

no_fragment_score = 0

2414

known_idx = last_seq

2415

except ExtractorError:

2416

continue

2417

2418

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2419

2420

def _extract_player_url(self, *ytcfgs, webpage=None):

2421

player_url = traverse_obj(

2422

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2423

get_all=False, expected_type=str)

2424

if not player_url:

2425

return

2426

return urljoin('https://www.youtube.com', player_url)

2427

2428

def _download_player_url(self, video_id, fatal=False):

2429

res = self._download_webpage(

2430

'https://www.youtube.com/iframe_api',

2431

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2432

if res:

2433

player_version = self._search_regex(

2434

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2435

if player_version:

2436

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2437

2438

def _signature_cache_id(self, example_sig):

2439

""" Return a string representation of a signature """

2440

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2441

2442

@classmethod

2443

def _extract_player_info(cls, player_url):

2444

for player_re in cls._PLAYER_INFO_RE:

2445

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2450

return id_m.group('id')

2451

2452

def _load_player(self, video_id, player_url, fatal=True):

2453

player_id = self._extract_player_info(player_url)

2454

if player_id not in self._code_cache:

2455

code = self._download_webpage(

2456

player_url, video_id, fatal=fatal,

2457

note='Downloading player ' + player_id,

2458

errnote='Download of %s failed' % player_url)

2459

if code:

2460

self._code_cache[player_id] = code

2461

return self._code_cache.get(player_id)

2462

2463

def _extract_signature_function(self, video_id, player_url, example_sig):

2464

player_id = self._extract_player_info(player_url)

2465

2466

# Read from filesystem cache

2467

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2468

assert os.path.basename(func_id) == func_id

2469

2470

cache_spec = self.cache.load('youtube-sigfuncs', func_id)

2471

if cache_spec is not None:

2472

return lambda s: ''.join(s[i] for i in cache_spec)

2473

2474

code = self._load_player(video_id, player_url)

2475

if code:

2476

res = self._parse_sig_js(code)

2477

2478

test_string = ''.join(map(chr, range(len(example_sig))))

2479

cache_res = res(test_string)

2480

cache_spec = [ord(c) for c in cache_res]

2481

2482

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2483

return res

2484

2485

def _print_sig_code(self, func, example_sig):

2486

if not self.get_param('youtube_print_sig_code'):

2487

return

2488

2489

def gen_sig_code(idxs):

2490

def _genslice(start, end, step):

2491

starts = '' if start == 0 else str(start)

2492

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2493

steps = '' if step == 1 else (':%d' % step)

2494

return f's[{starts}{ends}{steps}]'

2495

2496

step = None

2497

# Quelch pyflakes warnings - start will be set when step is set

2498

start = '(Never used)'

2499

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2504

step = None

2505

continue

2506

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2516

2517

test_string = ''.join(map(chr, range(len(example_sig))))

2518

cache_res = func(test_string)

2519

cache_spec = [ord(c) for c in cache_res]

2520

expr_code = ' + '.join(gen_sig_code(cache_spec))

2521

signature_id_tuple = '(%s)' % (

2522

', '.join(str(len(p)) for p in example_sig.split('.')))

2523

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2524

' return %s\n') % (signature_id_tuple, expr_code)

2525

self.to_screen('Extracted signature function:\n' + code)

2526

2527

def _parse_sig_js(self, jscode):

2528

funcname = self._search_regex(

2529

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2530

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2531

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2532

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2533

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2534

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2535

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2536

# Obsolete patterns

2537

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2538

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2539

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2540

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2541

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2542

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2543

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2544

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2545

jscode, 'Initial JS player signature function name', group='sig')

2546

2547

jsi = JSInterpreter(jscode)

2548

initial_function = jsi.extract_function(funcname)

2549

return lambda s: initial_function([s])

2550

2551

def _decrypt_signature(self, s, video_id, player_url):

2552

"""Turn the encrypted s field into a working signature"""

2553

try:

2554

player_id = (player_url, self._signature_cache_id(s))

2555

if player_id not in self._player_cache:

2556

func = self._extract_signature_function(video_id, player_url, s)

2557

self._player_cache[player_id] = func

2558

func = self._player_cache[player_id]

2559

self._print_sig_code(func, s)

2560

return func(s)

2561

except Exception as e:

2562

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2563

2564

def _decrypt_nsig(self, s, video_id, player_url):

2565

"""Turn the encrypted n field into a working signature"""

2566

if player_url is None:

2567

raise ExtractorError('Cannot decrypt nsig without player_url')

2568

player_url = urljoin('https://www.youtube.com', player_url)

2569

2570

sig_id = ('nsig_value', s)

2571

if sig_id in self._player_cache:

2572

return self._player_cache[sig_id]

2573

2574

try:

2575

player_id = ('nsig', player_url)

2576

if player_id not in self._player_cache:

2577

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2578

func = self._player_cache[player_id]

2579

self._player_cache[sig_id] = func(s)

2580

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2581

return self._player_cache[sig_id]

2582

except Exception as e:

2583

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2584

2585

def _extract_n_function_name(self, jscode):

2586

nfunc, idx = self._search_regex(

2587

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2588

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2589

if not idx:

2590

return nfunc

2591

return json.loads(js_to_json(self._search_regex(

2592

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2593

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2594

2595

def _extract_n_function(self, video_id, player_url):

2596

player_id = self._extract_player_info(player_url)

2597

func_code = self.cache.load('youtube-nsig', player_id)

2598

2599

if func_code:

2600

jsi = JSInterpreter(func_code)

2601

else:

2602

jscode = self._load_player(video_id, player_url)

2603

funcname = self._extract_n_function_name(jscode)

2604

jsi = JSInterpreter(jscode)

2605

func_code = jsi.extract_function_code(funcname)

2606

self.cache.store('youtube-nsig', player_id, func_code)

2607

2608

if self.get_param('youtube_print_sig_code'):

2609

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2610

2611

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2612

2613

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2614

"""

2615

Extract signatureTimestamp (sts)

2616

Required to tell API what sig/player version is in use.

2617

"""

2618

sts = None

2619

if isinstance(ytcfg, dict):

2620

sts = int_or_none(ytcfg.get('STS'))

2621

2622

if not sts:

2623

# Attempt to extract from player

2624

if player_url is None:

2625

error_msg = 'Cannot extract signature timestamp without player_url.'

2626

if fatal:

2627

raise ExtractorError(error_msg)

2628

self.report_warning(error_msg)

2629

return

2630

code = self._load_player(video_id, player_url, fatal=fatal)

2631

if code:

2632

sts = int_or_none(self._search_regex(

2633

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2634

'JS player signature timestamp', group='sts', fatal=fatal))

2635

return sts

2636

2637

def _mark_watched(self, video_id, player_responses):

2638

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2639

label = 'fully ' if is_full else ''

2640

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2641

expected_type=url_or_none)

2642

if not url:

2643

self.report_warning(f'Unable to mark {label}watched')

2644

return

2645

parsed_url = urllib.parse.urlparse(url)

2646

qs = urllib.parse.parse_qs(parsed_url.query)

2647

2648

# cpn generation algorithm is reverse engineered from base.js.

2649

# In fact it works even with dummy cpn.

2650

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2651

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2652

2653

# # more consistent results setting it to right before the end

2654

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2665

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2672

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2673

2674

self._download_webpage(

2675

url, video_id, f'Marking {label}watched',

2676

'Unable to mark watched', fatal=False)

2677

2678

@staticmethod

2679

def _extract_urls(webpage):

2680

# Embedded YouTube player

2681

entries = [

2682

unescapeHTML(mobj.group('url'))

2683

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2694

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2695

\1''', webpage)]

2696

2697

# lazyYT YouTube embed

2698

entries.extend(list(map(

2699

unescapeHTML,

2700

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2701

2702

# Wordpress "YouTube Video Importer" plugin

2703

matches = re.findall(r'''(?x)<div[^>]+

2704

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2705

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2706

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2712

urls = YoutubeIE._extract_urls(webpage)

2713

return urls[0] if urls else None

2714

2715

@classmethod

2716

def extract_id(cls, url):

2717

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2718

if mobj is None:

2719

raise ExtractorError('Invalid URL: %s' % url)

2720

return mobj.group('id')

2721

2722

def _extract_chapters_from_json(self, data, duration):

2723

chapter_list = traverse_obj(

2724

data, (

2725

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2726

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2727

), expected_type=list)

2728

2729

return self._extract_chapters(

2730

chapter_list,

2731

chapter_time=lambda chapter: float_or_none(

2732

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2733

chapter_title=lambda chapter: traverse_obj(

2734

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2735

duration=duration)

2736

2737

def _extract_chapters_from_engagement_panel(self, data, duration):

2738

content_list = traverse_obj(

2739

data,

2740

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2741

expected_type=list, default=[])

2742

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2743

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2744

2745

return next(filter(None, (

2746

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2747

chapter_time, chapter_title, duration)

2748

for contents in content_list)), [])

2749

2750

def _extract_chapters_from_description(self, description, duration):

2751

return self._extract_chapters(

2752

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2753

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2754

duration=duration, strict=False)

2755

2756

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2761

'title': chapter_title(chapter),

2762

} for chapter in chapter_list or []]

2763

if not strict:

2764

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2765

2766

chapters = [{'start_time': 0, 'title': '<Untitled>'}]

2767

for idx, chapter in enumerate(chapter_list):

2768

if chapter['start_time'] is None or not chapter['title']:

2769

self.report_warning(f'Incomplete chapter {idx}')

2770

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2771

chapters[-1]['end_time'] = chapter['start_time']

2772

chapters.append(chapter)

2773

else:

2774

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2775

chapters[-1]['end_time'] = duration

2776

return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]

2777

2778

def _extract_comment(self, comment_renderer, parent=None):

2779

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2784

2785

# note: timestamp is an estimate calculated from the current time and time_text

2786

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2787

author = self._get_text(comment_renderer, 'authorText')

2788

author_id = try_get(comment_renderer,

2789

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2790

2791

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2792

lambda x: x['likeCount']), str)) or 0

2793

author_thumbnail = try_get(comment_renderer,

2794

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2795

2796

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2797

is_favorited = 'creatorHeart' in (try_get(

2798

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2803

'time_text': time_text,

2804

'like_count': votes,

2805

'is_favorited': is_favorited,

2806

'author': author,

2807

'author_id': author_id,

2808

'author_thumbnail': author_thumbnail,

2809

'author_is_uploader': author_is_uploader,

2810

'parent': parent or 'root'

2811

}

2812

2813

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2814

2815

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2816

2817

def extract_header(contents):

2818

_continuation = None

2819

for content in contents:

2820

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2821

expected_comment_count = self._get_count(

2822

comments_header_renderer, 'countText', 'commentsCount')

2823

2824

if expected_comment_count:

2825

tracker['est_total'] = expected_comment_count

2826

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2827

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2828

2829

sort_menu_item = try_get(

2830

comments_header_renderer,

2831

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2832

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2833

2834

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2835

if not _continuation:

2836

continue

2837

2838

sort_text = str_or_none(sort_menu_item.get('title'))

2839

if not sort_text:

2840

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2841

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2846

if not parent:

2847

tracker['current_page_thread'] = 0

2848

for content in contents:

2849

if not parent and tracker['total_parent_comments'] >= max_parents:

2850

yield

2851

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2852

comment_renderer = get_first(

2853

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2854

expected_type=dict, default={})

2855

2856

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2861

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2862

yield comment

2863

2864

# Attempt to get the replies

2865

comment_replies_renderer = try_get(

2866

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2867

2868

if comment_replies_renderer:

2869

tracker['current_page_thread'] += 1

2870

comment_entries_iter = self._comment_entries(

2871

comment_replies_renderer, ytcfg, video_id,

2872

parent=comment.get('id'), tracker=tracker)

2873

yield from itertools.islice(comment_entries_iter, min(

2874

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2875

2876

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2882

total_parent_comments=0,

2883

total_reply_comments=0)

2884

2885

# TODO: Deprecated

2886

# YouTube comments have a max depth of 2

2887

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2888

if max_depth:

2889

self._downloader.deprecation_warning(

2890

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2891

if max_depth == 1 and parent:

2892

return

2893

2894

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2895

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2896

2897

continuation = self._extract_continuation(root_continuation_data)

2898

2899

response = None

2900

is_forced_continuation = False

2901

is_first_continuation = parent is None

2902

if is_first_continuation and not continuation:

2903

# Sometimes you can get comments by generating the continuation yourself,

2904

# even if YouTube initially reports them being disabled - e.g. stories comments.

2905

# Note: if the comment section is actually disabled, YouTube may return a response with

2906

# required check_get_keys missing. So we will disable that check initially in this case.

2907

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2908

is_forced_continuation = True

2909

2910

for page_num in itertools.count(0):

2911

if not continuation:

2912

break

2913

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2914

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2915

if page_num == 0:

2916

if is_first_continuation:

2917

note_prefix = 'Downloading comment section API JSON'

2918

else:

2919

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2920

tracker['current_page_thread'], comment_prog_str)

2921

else:

2922

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2923

' ' if parent else '', ' replies' if parent else '',

2924

page_num, comment_prog_str)

2925

2926

response = self._extract_response(

2927

item_id=None, query=continuation,

2928

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2929

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2930

is_forced_continuation = False

2931

continuation_contents = traverse_obj(

2932

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2933

2934

continuation = None

2935

for continuation_section in continuation_contents:

2936

continuation_items = traverse_obj(

2937

continuation_section,

2938

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2939

get_all=False, expected_type=list) or []

2940

if is_first_continuation:

2941

continuation = extract_header(continuation_items)

2942

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2956

if message and not parent and tracker['running_total'] == 0:

2957

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2958

2959

@staticmethod

2960

def _generate_comment_continuation(video_id):

2961

"""

2962

Generates initial comment section continuation token from given video id

2963

"""

2964

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2965

return base64.b64encode(token.encode()).decode()

2966

2967

def _get_comments(self, ytcfg, video_id, contents, webpage):

2968

"""Entry for comment extraction"""

2969

def _real_comment_extract(contents):

2970

renderer = next((

2971

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2972

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2973

yield from self._comment_entries(renderer, ytcfg, video_id)

2974

2975

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2976

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2977

2978

@staticmethod

2979

def _get_checkok_params():

2980

return {'contentCheckOk': True, 'racyCheckOk': True}

2981

2982

@classmethod

2983

def _generate_player_context(cls, sts=None):

2984

context = {

2985

'html5Preference': 'HTML5_PREF_WANTS',

2986

}

2987

if sts is not None:

2988

context['signatureTimestamp'] = sts

2989

return {

2990

'playbackContext': {

2991

'contentPlaybackContext': context

2992

},

2993

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2998

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2999

return True

3000

3001

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3002

AGE_GATE_REASONS = (

3003

'confirm your age', 'age-restricted', 'inappropriate', # reason

3004

'age_verification_required', 'age_check_required', # status

3005

)

3006

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3007

3008

@staticmethod

3009

def _is_unplayable(player_response):

3010

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3011

3012

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3013

3014

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3015

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3016

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3017

headers = self.generate_api_headers(

3018

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3023

}

3024

yt_query.update(self._generate_player_context(sts))

3025

return self._extract_response(

3026

item_id=video_id, ep='player', query=yt_query,

3027

ytcfg=player_ytcfg, headers=headers, fatal=True,

3028

default_client=client,

3029

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3030

) or None

3031

3032

def _get_requested_clients(self, url, smuggled_data):

3033

requested_clients = []

3034

default = ['android', 'web']

3035

allowed_clients = sorted(

3036

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3037

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3038

for client in self._configuration_arg('player_client'):

3039

if client in allowed_clients:

3040

requested_clients.append(client)

3041

elif client == 'default':

3042

requested_clients.extend(default)

3043

elif client == 'all':

3044

requested_clients.extend(allowed_clients)

3045

else:

3046

self.report_warning(f'Skipping unsupported client {client}')

3047

if not requested_clients:

3048

requested_clients = default

3049

3050

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3051

requested_clients.extend(

3052

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3053

3054

return orderedSet(requested_clients)

3055

3056

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3057

initial_pr = None

3058

if webpage:

3059

initial_pr = self._search_json(

3060

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3061

3062

all_clients = set(clients)

3063

clients = clients[::-1]

3064

prs = []

3065

3066

def append_client(*client_names):

3067

""" Append the first client name that exists but not already used """

3068

for client_name in client_names:

3069

actual_client = _split_innertube_client(client_name)[0]

3070

if actual_client in INNERTUBE_CLIENTS:

3071

if actual_client not in all_clients:

3072

clients.append(client_name)

3073

all_clients.add(actual_client)

3074

return

3075

3076

# Android player_response does not have microFormats which are needed for

3077

# extraction of some data. So we return the initial_pr with formats

3078

# stripped out even if not requested by the user

3079

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3080

if initial_pr:

3081

pr = dict(initial_pr)

3082

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3087

player_url = None

3088

while clients:

3089

client, base_client, variant = _split_innertube_client(clients.pop())

3090

player_ytcfg = master_ytcfg if client == 'web' else {}

3091

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3092

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3093

3094

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3095

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3096

if 'js' in self._configuration_arg('player_skip'):

3097

require_js_player = False

3098

player_url = None

3099

3100

if not player_url and not tried_iframe_fallback and require_js_player:

3101

player_url = self._download_player_url(video_id)

3102

tried_iframe_fallback = True

3103

3104

try:

3105

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3106

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3107

except ExtractorError as e:

3108

if last_error:

3109

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3117

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3118

append_client(f'{base_client}_creator')

3119

elif self._is_agegated(pr):

3120

if variant == 'tv_embedded':

3121

append_client(f'{base_client}_embedded')

3122

elif not variant:

3123

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3129

return prs, player_url

3130

3131

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3132

itags, stream_ids = {}, []

3133

itag_qualities, res_qualities = {}, {}

3134

q = qualities([

3135

# Normally tiny is the smallest video-only formats. But

3136

# audio-only formats with unknown quality may get tagged as tiny

3137

'tiny',

3138

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3139

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3140

])

3141

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3142

3143

for fmt in streaming_formats:

3144

if fmt.get('targetDurationSec'):

3145

continue

3146

3147

itag = str_or_none(fmt.get('itag'))

3148

audio_track = fmt.get('audioTrack') or {}

3149

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3150

if stream_id in stream_ids:

3151

continue

3152

3153

quality = fmt.get('quality')

3154

height = int_or_none(fmt.get('height'))

3155

if quality == 'tiny' or not quality:

3156

quality = fmt.get('audioQuality', '').lower() or quality

3157

# The 3gp format (17) in android client has a quality of "small",

3158

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3164

if height:

3165

res_qualities[height] = quality

3166

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3167

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3168

# number of fragment that would subsequently requested with (`&sq=N`)

3169

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3170

continue

3171

3172

fmt_url = fmt.get('url')

3173

if not fmt_url:

3174

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3175

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3176

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3177

if not all((sc, fmt_url, player_url, encrypted_sig)):

3178

continue

3179

try:

3180

fmt_url += '&%s=%s' % (

3181

traverse_obj(sc, ('sp', -1)) or 'signature',

3182

self._decrypt_signature(encrypted_sig, video_id, player_url)

3183

)

3184

except ExtractorError as e:

3185

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3186

self.write_debug(e, only_once=True)

3187

continue

3188

3189

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3194

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3195

except ExtractorError as e:

3196

self.report_warning(

3197

'nsig extraction failed: You may experience throttling for some formats\n'

3198

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3199

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3204

stream_ids.append(stream_id)

3205

3206

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3207

language_preference = (

3208

10 if audio_track.get('audioIsDefault') and 10

3209

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3210

else -1)

3211

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3212

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3213

# Make sure to avoid false positives with small duration differences.

3214

# Eg: __2ABJjxzNo, ySuUZEjARPY

3215

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3216

if is_damaged:

3217

self.report_warning(

3218

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3219

dct = {

3220

'asr': int_or_none(fmt.get('audioSampleRate')),

3221

'filesize': int_or_none(fmt.get('contentLength')),

3222

'format_id': itag,

3223

'format_note': join_nonempty(

3224

'%s%s' % (audio_track.get('displayName') or '',

3225

' (default)' if language_preference > 0 else ''),

3226

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3227

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3228

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3229

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3230

'fps': int_or_none(fmt.get('fps')) or None,

3231

'height': height,

3232

'quality': q(quality),

3233

'has_drm': bool(fmt.get('drmFamilies')),

3234

'tbr': tbr,

3235

'url': fmt_url,

3236

'width': int_or_none(fmt.get('width')),

3237

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3238

'desc' if language_preference < -1 else ''),

3239

'language_preference': language_preference,

3240

# Strictly de-prioritize damaged and 3gp formats

3241

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3242

}

3243

mime_mobj = re.match(

3244

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3245

if mime_mobj:

3246

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3247

dct.update(parse_codecs(mime_mobj.group(2)))

3248

no_audio = dct.get('acodec') == 'none'

3249

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3255

dct['downloader_options'] = {

3256

# Youtube throttles chunks >~10M

3257

'http_chunk_size': 10485760,

3258

}

3259

if dct.get('ext'):

3260

dct['container'] = dct['ext'] + '_dash'

3261

yield dct

3262

3263

live_from_start = is_live and self.get_param('live_from_start')

3264

skip_manifests = self._configuration_arg('skip')

3265

if not self.get_param('youtube_include_hls_manifest', True):

3266

skip_manifests.append('hls')

3267

if not self.get_param('youtube_include_dash_manifest', True):

3268

skip_manifests.append('dash')

3269

get_dash = 'dash' not in skip_manifests and (

3270

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3271

get_hls = not live_from_start and 'hls' not in skip_manifests

3272

3273

def process_manifest_format(f, proto, itag):

3274

if itag in itags:

3275

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3276

return False

3277

itag = f'{itag}-{proto}'

3278

if itag:

3279

f['format_id'] = itag

3280

itags[itag] = proto

3281

3282

f['quality'] = next((

3283

q(qdict[val])

3284

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3285

if val in qdict), -1)

3286

return True

3287

3288

for sd in streaming_data:

3289

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3290

if hls_manifest_url:

3291

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3292

if process_manifest_format(f, 'hls', self._search_regex(

3293

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3294

yield f

3295

3296

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3297

if dash_manifest_url:

3298

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3299

if process_manifest_format(f, 'dash', f['format_id']):

3300

f['filesize'] = int_or_none(self._search_regex(

3301

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3302

if live_from_start:

3303

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3308

spec = get_first(

3309

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3310

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3315

args = args.split('#')

3316

counts = list(map(int_or_none, args[:5]))

3317

if len(args) != 8 or not all(counts):

3318

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3319

continue

3320

width, height, frame_count, cols, rows = counts

3321

N, sigh = args[6:]

3322

3323

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3324

fragment_count = frame_count / (cols * rows)

3325

fragment_duration = duration / fragment_count

3326

yield {

3327

'format_id': f'sb{i}',

3328

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3338

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3339

} for j in range(math.ceil(fragment_count))],

3340

}

3341

3342

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3343

webpage = None

3344

if 'webpage' not in self._configuration_arg('player_skip'):

3345

webpage = self._download_webpage(

3346

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3347

3348

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3349

3350

player_responses, player_url = self._extract_player_responses(

3351

self._get_requested_clients(url, smuggled_data),

3352

video_id, webpage, master_ytcfg)

3353

3354

return webpage, master_ytcfg, player_responses, player_url

3355

3356

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3357

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3358

is_live = get_first(video_details, 'isLive')

3359

if is_live is None:

3360

is_live = get_first(live_broadcast_details, 'isLiveNow')

3361

3362

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3363

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3364

3365

return live_broadcast_details, is_live, streaming_data, formats

3366

3367

def _real_extract(self, url):

3368

url, smuggled_data = unsmuggle_url(url, {})

3369

video_id = self._match_id(url)

3370

3371

base_url = self.http_scheme() + '//www.youtube.com/'

3372

webpage_url = base_url + 'watch?v=' + video_id

3373

3374

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3375

3376

playability_statuses = traverse_obj(

3377

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3378

3379

trailer_video_id = get_first(

3380

playability_statuses,

3381

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3382

expected_type=str)

3383

if trailer_video_id:

3384

return self.url_result(

3385

trailer_video_id, self.ie_key(), trailer_video_id)

3386

3387

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3388

if webpage else (lambda x: None))

3389

3390

video_details = traverse_obj(

3391

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3392

microformats = traverse_obj(

3393

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3394

expected_type=dict, default=[])

3395

video_title = (

3396

get_first(video_details, 'title')

3397

or self._get_text(microformats, (..., 'title'))

3398

or search_meta(['og:title', 'twitter:title', 'title']))

3399

video_description = get_first(video_details, 'shortDescription')

3400

3401

multifeed_metadata_list = get_first(

3402

player_responses,

3403

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3404

expected_type=str)

3405

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3406

if self.get_param('noplaylist'):

3407

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3412

# Unquote should take place before split on comma (,) since textual

3413

# fields may contain comma as well (see

3414

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3415

feed_data = urllib.parse.parse_qs(

3416

urllib.parse.unquote_plus(feed))

3417

3418

def feed_entry(name):

3419

return try_get(

3420

feed_data, lambda x: x[name][0], str)

3421

3422

feed_id = feed_entry('id')

3423

if not feed_id:

3424

continue

3425

feed_title = feed_entry('title')

3426

title = video_title

3427

if feed_title:

3428

title += ' (%s)' % feed_title

3429

entries.append({

3430

'_type': 'url_transparent',

3431

'ie_key': 'Youtube',

3432

'url': smuggle_url(

3433

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3434

{'force_singlefeed': True}),

3435

'title': title,

3436

})

3437

feed_ids.append(feed_id)

3438

self.to_screen(

3439

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3440

% (', '.join(feed_ids), video_id))

3441

return self.playlist_result(

3442

entries, video_id, video_title, video_description)

3443

3444

duration = int_or_none(

3445

get_first(video_details, 'lengthSeconds')

3446

or get_first(microformats, 'lengthSeconds')

3447

or parse_duration(search_meta('duration'))) or None

3448

3449

if get_first(video_details, 'isPostLiveDvr'):

3450

self.write_debug('Video is in Post-Live Manifestless mode')

3451

if duration or 0 > 4 * 3600:

3452

self.report_warning(

3453

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3454

'This is a known issue and patches are welcome')

3455

3456

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3457

video_id, microformats, video_details, player_responses, player_url, duration)

3458

3459

if not formats:

3460

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3461

self.report_drm(video_id)

3462

pemr = get_first(

3463

playability_statuses,

3464

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3465

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3466

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3467

if subreason:

3468

if subreason == 'The uploader has not made this video available in your country.':

3469

countries = get_first(microformats, 'availableCountries')

3470

if not countries:

3471

regions_allowed = search_meta('regionsAllowed')

3472

countries = regions_allowed.split(',') if regions_allowed else None

3473

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3474

reason += f'. {subreason}'

3475

if reason:

3476

self.raise_no_formats(reason, expected=True)

3477

3478

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3479

if not keywords and webpage:

3480

keywords = [

3481

unescapeHTML(m.group('content'))

3482

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3483

for keyword in keywords:

3484

if keyword.startswith('yt:stretch='):

3485

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3486

if mobj:

3487

# NB: float is intentional for forcing float division

3488

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3493

f['stretched_ratio'] = ratio

3494

break

3495

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3496

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3497

if thumbnail_url:

3498

thumbnails.append({

3499

'url': thumbnail_url,

3500

})

3501

original_thumbnails = thumbnails.copy()

3502

3503

# The best resolution thumbnails sometimes does not appear in the webpage

3504

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3505

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3506

thumbnail_names = [

3507

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3508

# in resolution, these are not the custom thumbnail. So de-prioritize them

3509

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3510

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3511

]

3512

n_thumbnail_names = len(thumbnail_names)

3513

thumbnails.extend({

3514

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3515

video_id=video_id, name=name, ext=ext,

3516

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3517

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3518

for thumb in thumbnails:

3519

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3520

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3521

self._remove_duplicate_formats(thumbnails)

3522

self._downloader._sort_thumbnails(original_thumbnails)

3523

3524

category = get_first(microformats, 'category') or search_meta('genre')

3525

channel_id = str_or_none(

3526

get_first(video_details, 'channelId')

3527

or get_first(microformats, 'externalChannelId')

3528

or search_meta('channelId'))

3529

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3530

3531

live_content = get_first(video_details, 'isLiveContent')

3532

is_upcoming = get_first(video_details, 'isUpcoming')

3533

if is_live is None:

3534

if is_upcoming or live_content is False:

3535

is_live = False

3536

if is_upcoming is None and (live_content or is_live):

3537

is_upcoming = False

3538

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3539

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3540

if not duration and live_end_time and live_start_time:

3541

duration = live_end_time - live_start_time

3542

3543

if is_live and self.get_param('live_from_start'):

3544

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3545

3546

formats.extend(self._extract_storyboard(player_responses, duration))

3547

3548

# Source is given priority since formats that throttle are given lower source_preference

3549

# When throttling issue is fully fixed, remove this

3550

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3555

'formats': formats,

3556

'thumbnails': thumbnails,

3557

# The best thumbnail that we are sure exists. Prevents unnecessary

3558

# URL checking if user don't care about getting the best possible thumbnail

3559

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3560

'description': video_description,

3561

'uploader': get_first(video_details, 'author'),

3562

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3563

'uploader_url': owner_profile_url,

3564

'channel_id': channel_id,

3565

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3566

'duration': duration,

3567

'view_count': int_or_none(

3568

get_first((video_details, microformats), (..., 'viewCount'))

3569

or search_meta('interactionCount')),

3570

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3571

'age_limit': 18 if (

3572

get_first(microformats, 'isFamilySafe') is False

3573

or search_meta('isFamilyFriendly') == 'false'

3574

or search_meta('og:restrictions:age') == '18+') else 0,

3575

'webpage_url': webpage_url,

3576

'categories': [category] if category else None,

3577

'tags': keywords,

3578

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3579

'is_live': is_live,

3580

'was_live': (False if is_live or is_upcoming or live_content is False

3581

else None if is_live is None or is_upcoming is None

3582

else live_content),

3583

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3584

'release_timestamp': live_start_time,

3585

}

3586

3587

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3588

if pctr:

3589

def get_lang_code(track):

3590

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3591

or track.get('languageCode'))

3592

3593

# Converted into dicts to remove duplicates

3594

captions = {

3595

get_lang_code(sub): sub

3596

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3597

translation_languages = {

3598

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3599

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3600

3601

def process_language(container, base_url, lang_code, sub_name, query):

3602

lang_subs = container.setdefault(lang_code, [])

3603

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3614

for lang_code, caption_track in captions.items():

3615

base_url = caption_track.get('baseUrl')

3616

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3617

if not base_url:

3618

continue

3619

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3620

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3625

if not caption_track.get('isTranslatable'):

3626

continue

3627

for trans_code, trans_name in translation_languages.items():

3628

if not trans_code:

3629

continue

3630

orig_trans_code = trans_code

3631

if caption_track.get('kind') != 'asr':

3632

if 'translated_subs' in self._configuration_arg('skip'):

3633

continue

3634

trans_code += f'-{lang_code}'

3635

trans_name += format_field(lang_name, None, ' from %s')

3636

# Add an "-orig" label to the original language so that it can be distinguished.

3637

# The subs are returned without "-orig" as well for compatibility

3638

if lang_code == f'a-{orig_trans_code}':

3639

process_language(

3640

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3641

# Setting tlang=lang returns damaged subtitles.

3642

process_language(automatic_captions, base_url, trans_code, trans_name,

3643

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3644

info['automatic_captions'] = automatic_captions

3645

info['subtitles'] = subtitles

3646

3647

parsed_url = urllib.parse.urlparse(url)

3648

for component in [parsed_url.fragment, parsed_url.query]:

3649

query = urllib.parse.parse_qs(component)

3650

for k, v in query.items():

3651

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3652

d_k += '_time'

3653

if d_k not in info and k in s_ks:

3654

info[d_k] = parse_duration(query[k][0])

3655

3656

# Youtube Music Auto-generated description

3657

if video_description:

3658

mobj = re.search(

3659

r'''(?xs)

3660

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3661

(?P<album>[^\n]+)

3662

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3663

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3664

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3665

.+\nAuto-generated\ by\ YouTube\.\s*$

3666

''', video_description)

3667

if mobj:

3668

release_year = mobj.group('release_year')

3669

release_date = mobj.group('release_date')

3670

if release_date:

3671

release_date = release_date.replace('-', '')

3672

if not release_year:

3673

release_year = release_date[:4]

3674

info.update({

3675

'album': mobj.group('album'.strip()),

3676

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3677

'track': mobj.group('track').strip(),

3678

'release_date': release_date,

3679

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3685

if not initial_data:

3686

query = {'videoId': video_id}

3687

query.update(self._get_checkok_params())

3688

initial_data = self._extract_response(

3689

item_id=video_id, ep='next', fatal=False,

3690

ytcfg=master_ytcfg, query=query,

3691

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3692

note='Downloading initial data API JSON')

3693

3694

info['comment_count'] = traverse_obj(initial_data, (

3695

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3696

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3697

), (

3698

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3699

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3700

), expected_type=int_or_none, get_all=False)

3701

3702

try: # This will error if there is no livechat

3703

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3704

except (KeyError, IndexError, TypeError):

3705

pass

3706

else:

3707

info.setdefault('subtitles', {})['live_chat'] = [{

3708

# url is needed to set cookies

3709

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3710

'video_id': video_id,

3711

'ext': 'json',

3712

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3718

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3719

or self._extract_chapters_from_description(video_description, duration)

3720

or None)

3721

3722

contents = traverse_obj(

3723

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3724

expected_type=list, default=[])

3725

3726

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3727

if vpir:

3728

stl = vpir.get('superTitleLink')

3729

if stl:

3730

stl = self._get_text(stl)

3731

if try_get(

3732

vpir,

3733

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3734

info['location'] = stl

3735

else:

3736

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3737

if mobj:

3738

info.update({

3739

'series': mobj.group(1),

3740

'season_number': int(mobj.group(2)),

3741

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3746

list) or []):

3747

tbr = tlb.get('toggleButtonRenderer') or {}

3748

for getter, regex in [(

3749

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3750

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3751

lambda x: x['accessibility'],

3752

lambda x: x['accessibilityData']['accessibilityData'],

3753

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3754

label = (try_get(tbr, getter, dict) or {}).get('label')

3755

if label:

3756

mobj = re.match(regex, label)

3757

if mobj:

3758

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3759

break

3760

sbr_tooltip = try_get(

3761

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3762

if sbr_tooltip:

3763

like_count, dislike_count = sbr_tooltip.split(' / ')

3764

info.update({

3765

'like_count': str_to_int(like_count),

3766

'dislike_count': str_to_int(dislike_count),

3767

})

3768

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3769

if vsir:

3770

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3771

info.update({

3772

'channel': self._get_text(vor, 'title'),

3773

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3778

list) or []

3779

multiple_songs = False

3780

for row in rows:

3781

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3782

multiple_songs = True

3783

break

3784

for row in rows:

3785

mrr = row.get('metadataRowRenderer') or {}

3786

mrr_title = mrr.get('title')

3787

if not mrr_title:

3788

continue

3789

mrr_title = self._get_text(mrr, 'title')

3790

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3791

if mrr_title == 'License':

3792

info['license'] = mrr_contents_text

3793

elif not multiple_songs:

3794

if mrr_title == 'Album':

3795

info['album'] = mrr_contents_text

3796

elif mrr_title == 'Artist':

3797

info['artist'] = mrr_contents_text

3798

elif mrr_title == 'Song':

3799

info['track'] = mrr_contents_text

3800

3801

fallbacks = {

3802

'channel': 'uploader',

3803

'channel_id': 'uploader_id',

3804

'channel_url': 'uploader_url',

3805

}

3806

3807

# The upload date for scheduled, live and past live streams / premieres in microformats

3808

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3809

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3810

upload_date = (

3811

unified_strdate(get_first(microformats, 'uploadDate'))

3812

or unified_strdate(search_meta('uploadDate')))

3813

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3814

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3815

info['upload_date'] = upload_date

3816

3817

for to, frm in fallbacks.items():

3818

if not info.get(to):

3819

info[to] = info.get(frm)

3820

3821

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3827

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3828

is_membersonly = None

3829

is_premium = None

3830

if initial_data and is_private is not None:

3831

is_membersonly = False

3832

is_premium = False

3833

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3834

badge_labels = set()

3835

for content in contents:

3836

if not isinstance(content, dict):

3837

continue

3838

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3839

for badge_label in badge_labels:

3840

if badge_label.lower() == 'members only':

3841

is_membersonly = True

3842

elif badge_label.lower() == 'premium':

3843

is_premium = True

3844

elif badge_label.lower() == 'unlisted':

3845

is_unlisted = True

3846

3847

info['availability'] = self._availability(

3848

is_private=is_private,

3849

needs_premium=is_premium,

3850

needs_subscription=is_membersonly,

3851

needs_auth=info['age_limit'] >= 18,

3852

is_unlisted=None if is_private is None else is_unlisted)

3853

3854

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3855

3856

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3862

3863

@staticmethod

3864

def passthrough_smuggled_data(func):

3865

def _smuggle(entries, smuggled_data):

3866

for entry in entries:

3867

# TODO: Convert URL to music.youtube instead.

3868

# Do we need to passthrough any other smuggled_data?

3869

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3870

yield entry

3871

3872

@functools.wraps(func)

3873

def wrapper(self, url):

3874

url, smuggled_data = unsmuggle_url(url, {})

3875

if self.is_music_url(url):

3876

smuggled_data['is_music_url'] = True

3877

info_dict = func(self, url, smuggled_data)

3878

if smuggled_data and info_dict.get('entries'):

3879

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3884

channel_id = self._html_search_meta(

3885

'channelId', webpage, 'channel id', default=None)

3886

if channel_id:

3887

return channel_id

3888

channel_url = self._html_search_meta(

3889

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3890

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3891

'twitter:app:url:googleplay'), webpage, 'channel url')

3892

return self._search_regex(

3893

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3894

channel_url, 'channel id')

3895

3896

@staticmethod

3897

def _extract_basic_item_renderer(item):

3898

# Modified from _extract_grid_item_renderer

3899

known_basic_renderers = (

3900

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3901

)

3902

for key, renderer in item.items():

3903

if not isinstance(renderer, dict):

3904

continue

3905

elif key in known_basic_renderers:

3906

return renderer

3907

elif key.startswith('grid') and key.endswith('Renderer'):

3908

return renderer

3909

3910

def _grid_entries(self, grid_renderer):

3911

for item in grid_renderer['items']:

3912

if not isinstance(item, dict):

3913

continue

3914

renderer = self._extract_basic_item_renderer(item)

3915

if not isinstance(renderer, dict):

3916

continue

3917

title = self._get_text(renderer, 'title')

3918

3919

# playlist

3920

playlist_id = renderer.get('playlistId')

3921

if playlist_id:

3922

yield self.url_result(

3923

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3924

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3929

if video_id:

3930

yield self._extract_video(renderer)

3931

continue

3932

# channel

3933

channel_id = renderer.get('channelId')

3934

if channel_id:

3935

yield self.url_result(

3936

'https://www.youtube.com/channel/%s' % channel_id,

3937

ie=YoutubeTabIE.ie_key(), video_title=title)

3938

continue

3939

# generic endpoint URL support

3940

ep_url = urljoin('https://www.youtube.com/', try_get(

3941

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3942

str))

3943

if ep_url:

3944

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3945

if ie.suitable(ep_url):

3946

yield self.url_result(

3947

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3948

break

3949

3950

def _music_reponsive_list_entry(self, renderer):

3951

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3952

if video_id:

3953

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3954

ie=YoutubeIE.ie_key(), video_id=video_id)

3955

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3956

if playlist_id:

3957

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3958

if video_id:

3959

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3960

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3961

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3962

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3963

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3964

if browse_id:

3965

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3966

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3967

3968

def _shelf_entries_from_content(self, shelf_renderer):

3969

content = shelf_renderer.get('content')

3970

if not isinstance(content, dict):

3971

return

3972

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3973

if renderer:

3974

# TODO: add support for nested playlists so each shelf is processed

3975

# as separate playlist

3976

# TODO: this includes only first N items

3977

yield from self._grid_entries(renderer)

3978

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3984

ep = try_get(

3985

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3986

str)

3987

shelf_url = urljoin('https://www.youtube.com', ep)

3988

if shelf_url:

3989

# Skipping links to another channels, note that checking for

3990

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3991

# will not work

3992

if skip_channels and '/channels?' in shelf_url:

3993

return

3994

title = self._get_text(shelf_renderer, 'title')

3995

yield self.url_result(shelf_url, video_title=title)

3996

# Shelf may not contain shelf URL, fallback to extraction from content

3997

yield from self._shelf_entries_from_content(shelf_renderer)

3998

3999

def _playlist_entries(self, video_list_renderer):

4000

for content in video_list_renderer['contents']:

4001

if not isinstance(content, dict):

4002

continue

4003

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4004

if not isinstance(renderer, dict):

4005

continue

4006

video_id = renderer.get('videoId')

4007

if not video_id:

4008

continue

4009

yield self._extract_video(renderer)

4010

4011

def _rich_entries(self, rich_grid_renderer):

4012

renderer = try_get(

4013

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4014

video_id = renderer.get('videoId')

4015

if not video_id:

4016

return

4017

yield self._extract_video(renderer)

4018

4019

def _video_entry(self, video_renderer):

4020

video_id = video_renderer.get('videoId')

4021

if video_id:

4022

return self._extract_video(video_renderer)

4023

4024

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4025

url = urljoin('https://youtube.com', traverse_obj(

4026

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4027

if url:

4028

return self.url_result(

4029

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4030

4031

def _post_thread_entries(self, post_thread_renderer):

4032

post_renderer = try_get(

4033

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4034

if not post_renderer:

4035

return

4036

# video attachment

4037

video_renderer = try_get(

4038

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4039

video_id = video_renderer.get('videoId')

4040

if video_id:

4041

entry = self._extract_video(video_renderer)

4042

if entry:

4043

yield entry

4044

# playlist attachment

4045

playlist_id = try_get(

4046

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4047

if playlist_id:

4048

yield self.url_result(

4049

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4050

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4051

# inline video links

4052

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4053

for run in runs:

4054

if not isinstance(run, dict):

4055

continue

4056

ep_url = try_get(

4057

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4058

if not ep_url:

4059

continue

4060

if not YoutubeIE.suitable(ep_url):

4061

continue

4062

ep_video_id = YoutubeIE._match_id(ep_url)

4063

if video_id == ep_video_id:

4064

continue

4065

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4066

4067

def _post_thread_continuation_entries(self, post_thread_continuation):

4068

contents = post_thread_continuation.get('contents')

4069

if not isinstance(contents, list):

4070

return

4071

for content in contents:

4072

renderer = content.get('backstagePostThreadRenderer')

4073

if isinstance(renderer, dict):

4074

yield from self._post_thread_entries(renderer)

4075

continue

4076

renderer = content.get('videoRenderer')

4077

if isinstance(renderer, dict):

4078

yield self._video_entry(renderer)

4079

4080

r''' # unused

4081

def _rich_grid_entries(self, contents):

4082

for content in contents:

4083

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4084

if video_renderer:

4085

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4091

# continuation_list is modified in-place with continuation_list = [continuation_token]

4092

continuation_list[:] = [None]

4093

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4094

for content in contents:

4095

if not isinstance(content, dict):

4096

continue

4097

is_renderer = traverse_obj(

4098

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4099

expected_type=dict)

4100

if not is_renderer:

4101

renderer = content.get('richItemRenderer')

4102

if renderer:

4103

for entry in self._rich_entries(renderer):

4104

yield entry

4105

continuation_list[0] = self._extract_continuation(parent_renderer)

4106

continue

4107

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4108

for isr_content in isr_contents:

4109

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4114

'gridRenderer': self._grid_entries,

4115

'reelShelfRenderer': self._grid_entries,

4116

'shelfRenderer': self._shelf_entries,

4117

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4118

'backstagePostThreadRenderer': self._post_thread_entries,

4119

'videoRenderer': lambda x: [self._video_entry(x)],

4120

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4121

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4122

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4123

}

4124

for key, renderer in isr_content.items():

4125

if key not in known_renderers:

4126

continue

4127

for entry in known_renderers[key](renderer):

4128

if entry:

4129

yield entry

4130

continuation_list[0] = self._extract_continuation(renderer)

4131

break

4132

4133

if not continuation_list[0]:

4134

continuation_list[0] = self._extract_continuation(is_renderer)

4135

4136

if not continuation_list[0]:

4137

continuation_list[0] = self._extract_continuation(parent_renderer)

4138

4139

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4140

continuation_list = [None]

4141

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4142

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4147

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4148

yield from extract_entries(parent_renderer)

4149

continuation = continuation_list[0]

4150

4151

for page_num in itertools.count(1):

4152

if not continuation:

4153

break

4154

headers = self.generate_api_headers(

4155

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4156

response = self._extract_response(

4157

item_id=f'{item_id} page {page_num}',

4158

query=continuation, headers=headers, ytcfg=ytcfg,

4159

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4164

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4165

visitor_data = self._extract_visitor_data(response) or visitor_data

4166

4167

known_continuation_renderers = {

4168

'playlistVideoListContinuation': self._playlist_entries,

4169

'gridContinuation': self._grid_entries,

4170

'itemSectionContinuation': self._post_thread_continuation_entries,

4171

'sectionListContinuation': extract_entries, # for feeds

4172

}

4173

continuation_contents = try_get(

4174

response, lambda x: x['continuationContents'], dict) or {}

4175

continuation_renderer = None

4176

for key, value in continuation_contents.items():

4177

if key not in known_continuation_renderers:

4178

continue

4179

continuation_renderer = value

4180

continuation_list = [None]

4181

yield from known_continuation_renderers[key](continuation_renderer)

4182

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4183

break

4184

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4189

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4190

'gridVideoRenderer': (self._grid_entries, 'items'),

4191

'gridChannelRenderer': (self._grid_entries, 'items'),

4192

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4193

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4194

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4195

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4196

}

4197

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4198

continuation_items = try_get(

4199

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4200

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4201

video_items_renderer = None

4202

for key, value in continuation_item.items():

4203

if key not in known_renderers:

4204

continue

4205

video_items_renderer = {known_renderers[key][1]: continuation_items}

4206

continuation_list = [None]

4207

yield from known_renderers[key][0](video_items_renderer)

4208

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4209

break

4210

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4216

for tab in tabs:

4217

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4218

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4223

4224

def _extract_uploader(self, data):

4225

uploader = {}

4226

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4227

owner = try_get(

4228

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4229

if owner:

4230

owner_text = owner.get('text')

4231

uploader['uploader'] = self._search_regex(

4232

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4233

uploader['uploader_id'] = try_get(

4234

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4235

uploader['uploader_url'] = urljoin(

4236

'https://www.youtube.com/',

4237

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4238

return {k: v for k, v in uploader.items() if v is not None}

4239

4240

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4241

playlist_id = title = description = channel_url = channel_name = channel_id = None

4242

tags = []

4243

4244

selected_tab = self._extract_selected_tab(tabs)

4245

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4246

renderer = try_get(

4247

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4248

if renderer:

4249

channel_name = renderer.get('title')

4250

channel_url = renderer.get('channelUrl')

4251

channel_id = renderer.get('externalId')

4252

else:

4253

renderer = try_get(

4254

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4255

4256

if renderer:

4257

title = renderer.get('title')

4258

description = renderer.get('description', '')

4259

playlist_id = channel_id

4260

tags = renderer.get('keywords', '').split()

4261

4262

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4263

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4264

def _get_uncropped(url):

4265

return url_or_none((url or '').split('=')[0] + '=s0')

4266

4267

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4268

if avatar_thumbnails:

4269

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4270

if uncropped_avatar:

4271

avatar_thumbnails.append({

4272

'url': uncropped_avatar,

4273

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4278

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4279

for banner in channel_banners:

4280

banner['preference'] = -10

4281

4282

if channel_banners:

4283

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4284

if uncropped_banner:

4285

channel_banners.append({

4286

'url': uncropped_banner,

4287

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4292

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4293

4294

if playlist_id is None:

4295

playlist_id = item_id

4296

4297

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4298

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4299

if title is None:

4300

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4301

title += format_field(selected_tab, 'title', ' - %s')

4302

title += format_field(selected_tab, 'expandedText', ' - %s')

4303

4304

metadata = {

4305

'playlist_id': playlist_id,

4306

'playlist_title': title,

4307

'playlist_description': description,

4308

'uploader': channel_name,

4309

'uploader_id': channel_id,

4310

'uploader_url': channel_url,

4311

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4312

'tags': tags,

4313

'view_count': self._get_count(playlist_stats, 1),

4314

'availability': self._extract_availability(data),

4315

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4316

'playlist_count': self._get_count(playlist_stats, 0),

4317

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4318

}

4319

if not channel_id:

4320

metadata.update(self._extract_uploader(data))

4321

metadata.update({

4322

'channel': metadata['uploader'],

4323

'channel_id': metadata['uploader_id'],

4324

'channel_url': metadata['uploader_url']})

4325

return self.playlist_result(

4326

self._entries(

4327

selected_tab, playlist_id, ytcfg,

4328

self._extract_account_syncid(ytcfg, data),

4329

self._extract_visitor_data(data, ytcfg)),

4330

**metadata)

4331

4332

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4333

first_id = last_id = response = None

4334

for page_num in itertools.count(1):

4335

videos = list(self._playlist_entries(playlist))

4336

if not videos:

4337

return

4338

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4339

if start >= len(videos):

4340

return

4341

yield from videos[start:]

4342

first_id = first_id or videos[0]['id']

4343

last_id = videos[-1]['id']

4344

watch_endpoint = try_get(

4345

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4346

headers = self.generate_api_headers(

4347

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4348

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4349

query = {

4350

'playlistId': playlist_id,

4351

'videoId': watch_endpoint.get('videoId') or last_id,

4352

'index': watch_endpoint.get('index') or len(videos),

4353

'params': watch_endpoint.get('params') or 'OAE%3D'

4354

}

4355

response = self._extract_response(

4356

item_id='%s page %d' % (playlist_id, page_num),

4357

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4358

check_get_keys='contents'

4359

)

4360

playlist = try_get(

4361

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4362

4363

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4364

title = playlist.get('title') or try_get(

4365

data, lambda x: x['titleText']['simpleText'], str)

4366

playlist_id = playlist.get('playlistId') or item_id

4367

4368

# Delegating everything except mix playlists to regular tab-based playlist URL

4369

playlist_url = urljoin(url, try_get(

4370

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4371

str))

4372

4373

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4374

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4375

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4376

4377

if playlist_url and playlist_url != url and not is_known_unviewable:

4378

return self.url_result(

4379

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4380

video_title=title)

4381

4382

return self.playlist_result(

4383

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4384

playlist_id=playlist_id, playlist_title=title)

4385

4386

def _extract_availability(self, data):

4387

"""

4388

Gets the availability of a given playlist/tab.

4389

Note: Unless YouTube tells us explicitly, we do not assume it is public

4390

@param data: response

4391

"""

4392

is_private = is_unlisted = None

4393

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4394

badge_labels = self._extract_badges(renderer)

4395

4396

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4397

privacy_dropdown_entries = try_get(

4398

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4399

for renderer_dict in privacy_dropdown_entries:

4400

is_selected = try_get(

4401

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4402

if not is_selected:

4403

continue

4404

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4405

if label:

4406

badge_labels.add(label.lower())

4407

break

4408

4409

for badge_label in badge_labels:

4410

if badge_label == 'unlisted':

4411

is_unlisted = True

4412

elif badge_label == 'private':

4413

is_private = True

4414

elif badge_label == 'public':

4415

is_unlisted = is_private = False

4416

return self._availability(is_private, False, False, False, is_unlisted)

4417

4418

@staticmethod

4419

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4420

sidebar_renderer = try_get(

4421

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4422

for item in sidebar_renderer:

4423

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4428

"""

4429

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4430

"""

4431

browse_id = params = None

4432

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4433

if not renderer:

4434

return

4435

menu_renderer = try_get(

4436

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4437

for menu_item in menu_renderer:

4438

if not isinstance(menu_item, dict):

4439

continue

4440

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4441

text = try_get(

4442

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4443

if not text or text.lower() != 'show unavailable videos':

4444

continue

4445

browse_endpoint = try_get(

4446

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4447

browse_id = browse_endpoint.get('browseId')

4448

params = browse_endpoint.get('params')

4449

break

4450

4451

headers = self.generate_api_headers(

4452

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4453

visitor_data=self._extract_visitor_data(data, ytcfg))

4454

query = {

4455

'params': params or 'wgYCCAA=',

4456

'browseId': browse_id or 'VL%s' % item_id

4457

}

4458

return self._extract_response(

4459

item_id=item_id, headers=headers, query=query,

4460

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4461

note='Downloading API JSON with unavailable videos')

4462

4463

@functools.cached_property

4464

def skip_webpage(self):

4465

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4466

4467

def _extract_webpage(self, url, item_id, fatal=True):

4468

retries = self.get_param('extractor_retries', 3)

4469

count = -1

4470

webpage = data = last_error = None

4471

while count < retries:

4472

count += 1

4473

# Sometimes youtube returns a webpage with incomplete ytInitialData

4474

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4475

if last_error:

4476

self.report_warning('%s. Retrying ...' % last_error)

4477

try:

4478

webpage = self._download_webpage(

4479

url, item_id,

4480

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4481

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4482

except ExtractorError as e:

4483

if isinstance(e.cause, network_exceptions):

4484

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4485

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4495

except ExtractorError as e:

4496

if fatal:

4497

raise

4498

self.report_warning(error_to_compat_str(e))

4499

break

4500

4501

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4502

break

4503

4504

last_error = 'Incomplete yt initial data received'

4505

if count >= retries:

4506

if fatal:

4507

raise ExtractorError(last_error)

4508

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4514

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4515

if not ytcfg and self.is_authenticated:

4516

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4517

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4518

raise ExtractorError(

4519

f'{msg}. If you are not downloading private content, or '

4520

'your cookies are only for the first account and channel,'

4521

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4522

expected=True)

4523

self.report_warning(msg, only_once=True)

4524

4525

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4526

data = None

4527

if not self.skip_webpage:

4528

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4529

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4530

# Reject webpage data if redirected to home page without explicitly requesting

4531

selected_tab = self._extract_selected_tab(traverse_obj(

4532

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4533

if (url != 'https://www.youtube.com/feed/recommended'

4534

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4535

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4536

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4537

if fatal:

4538

raise ExtractorError(msg, expected=True)

4539

self.report_warning(msg, only_once=True)

4540

if not data:

4541

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4542

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4543

return data, ytcfg

4544

4545

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4546

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4547

resolve_response = self._extract_response(

4548

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4549

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4550

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4551

for ep_key, ep in endpoints.items():

4552

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4553

if params:

4554

return self._extract_response(

4555

item_id=item_id, query=params, ep=ep, headers=headers,

4556

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4557

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4558

err_note = 'Failed to resolve url (does the playlist exist?)'

4559

if fatal:

4560

raise ExtractorError(err_note, expected=True)

4561

self.report_warning(err_note, item_id)

4562

4563

_SEARCH_PARAMS = None

4564

4565

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4566

data = {'query': query}

4567

if params is NO_DEFAULT:

4568

params = self._SEARCH_PARAMS

4569

if params:

4570

data['params'] = params

4571

4572

content_keys = (

4573

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4574

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4575

# ytmusic search

4576

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4577

('continuationContents', ),

4578

)

4579

display_id = f'query "{query}"'

4580

check_get_keys = tuple({keys[0] for keys in content_keys})

4581

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4582

self._report_playlist_authcheck(ytcfg, fatal=False)

4583

4584

continuation_list = [None]

4585

search = None

4586

for page_num in itertools.count(1):

4587

data.update(continuation_list[0] or {})

4588

headers = self.generate_api_headers(

4589

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4590

search = self._extract_response(

4591

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4592

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4593

slr_contents = traverse_obj(search, *content_keys)

4594

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4595

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4600

IE_DESC = 'YouTube Tabs'

4601

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4610

(?P<not_channel>

4611

feed/|hashtag/|

4612

(?:playlist|watch)\?.*?\blist=

4613

)|

4614

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4619

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4620

}

4621

IE_NAME = 'youtube:tab'

4622

4623

_TESTS = [{

4624

'note': 'playlists, multipage',

4625

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4626

'playlist_mincount': 94,

4627

'info_dict': {

4628

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4629

'title': 'Igor Kleiner - Playlists',

4630

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4631

'uploader': 'Igor Kleiner',

4632

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4633

'channel': 'Igor Kleiner',

4634

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4635

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4636

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4637

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4638

'channel_follower_count': int

4639

},

4640

}, {

4641

'note': 'playlists, multipage, different order',

4642

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4643

'playlist_mincount': 94,

4644

'info_dict': {

4645

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4646

'title': 'Igor Kleiner - Playlists',

4647

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4648

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4649

'uploader': 'Igor Kleiner',

4650

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4651

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4652

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4653

'channel': 'Igor Kleiner',

4654

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4655

'channel_follower_count': int

4656

},

4657

}, {

4658

'note': 'playlists, series',

4659

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4660

'playlist_mincount': 5,

4661

'info_dict': {

4662

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4663

'title': '3Blue1Brown - Playlists',

4664

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4665

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4666

'uploader': '3Blue1Brown',

4667

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4668

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4669

'channel': '3Blue1Brown',

4670

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4671

'tags': ['Mathematics'],

4672

'channel_follower_count': int

4673

},

4674

}, {

4675

'note': 'playlists, singlepage',

4676

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4677

'playlist_mincount': 4,

4678

'info_dict': {

4679

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4680

'title': 'ThirstForScience - Playlists',

4681

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4682

'uploader': 'ThirstForScience',

4683

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4684

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4685

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4686

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4687

'tags': 'count:13',

4688

'channel': 'ThirstForScience',

4689

'channel_follower_count': int

4690

}

4691

}, {

4692

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4693

'only_matching': True,

4694

}, {

4695

'note': 'basic, single video playlist',

4696

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4697

'info_dict': {

4698

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4699

'uploader': 'Sergey M.',

4700

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4701

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4706

'channel': 'Sergey M.',

4707

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4708

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4709

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4714

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4715

'info_dict': {

4716

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4717

'uploader': 'Sergey M.',

4718

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4719

'title': 'youtube-dl empty playlist',

4720

'tags': [],

4721

'channel': 'Sergey M.',

4722

'description': '',

4723

'modified_date': '20160902',

4724

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4725

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4726

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4732

'info_dict': {

4733

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4734

'title': 'lex will - Home',

4735

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4736

'uploader': 'lex will',

4737

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4738

'channel': 'lex will',

4739

'tags': ['bible', 'history', 'prophesy'],

4740

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4741

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4742

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4743

'channel_follower_count': int

4744

},

4745

'playlist_mincount': 2,

4746

}, {

4747

'note': 'Videos tab',

4748

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4749

'info_dict': {

4750

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4751

'title': 'lex will - Videos',

4752

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4753

'uploader': 'lex will',

4754

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4755

'tags': ['bible', 'history', 'prophesy'],

4756

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4757

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4758

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4759

'channel': 'lex will',

4760

'channel_follower_count': int

4761

},

4762

'playlist_mincount': 975,

4763

}, {

4764

'note': 'Videos tab, sorted by popular',

4765

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4766

'info_dict': {

4767

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4768

'title': 'lex will - Videos',

4769

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4770

'uploader': 'lex will',

4771

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4772

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4773

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4774

'channel': 'lex will',

4775

'tags': ['bible', 'history', 'prophesy'],

4776

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4777

'channel_follower_count': int

4778

},

4779

'playlist_mincount': 199,

4780

}, {

4781

'note': 'Playlists tab',

4782

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4783

'info_dict': {

4784

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4785

'title': 'lex will - Playlists',

4786

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4787

'uploader': 'lex will',

4788

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4789

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4790

'channel': 'lex will',

4791

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4792

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4793

'tags': ['bible', 'history', 'prophesy'],

4794

'channel_follower_count': int

4795

},

4796

'playlist_mincount': 17,

4797

}, {

4798

'note': 'Community tab',

4799

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4800

'info_dict': {

4801

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4802

'title': 'lex will - Community',

4803

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4804

'uploader': 'lex will',

4805

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4806

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4807

'channel': 'lex will',

4808

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4810

'tags': ['bible', 'history', 'prophesy'],

4811

'channel_follower_count': int

4812

},

4813

'playlist_mincount': 18,

4814

}, {

4815

'note': 'Channels tab',

4816

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4817

'info_dict': {

4818

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4819

'title': 'lex will - Channels',

4820

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4821

'uploader': 'lex will',

4822

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4823

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4824

'channel': 'lex will',

4825

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4826

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4827

'tags': ['bible', 'history', 'prophesy'],

4828

'channel_follower_count': int

4829

},

4830

'playlist_mincount': 12,

4831

}, {

4832

'note': 'Search tab',

4833

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4834

'playlist_mincount': 40,

4835

'info_dict': {

4836

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4837

'title': '3Blue1Brown - Search - linear algebra',

4838

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4839

'uploader': '3Blue1Brown',

4840

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4841

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4842

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4843

'tags': ['Mathematics'],

4844

'channel': '3Blue1Brown',

4845

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4846

'channel_follower_count': int

4847

},

4848

}, {

4849

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4850

'only_matching': True,

4851

}, {

4852

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4853

'only_matching': True,

4854

}, {

4855

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4856

'only_matching': True,

4857

}, {

4858

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4859

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4860

'info_dict': {

4861

'title': '29C3: Not my department',

4862

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4863

'uploader': 'Christiaan008',

4864

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4865

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4866

'tags': [],

4867

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4868

'view_count': int,

4869

'modified_date': '20150605',

4870

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4871

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4872

'channel': 'Christiaan008',

4873

},

4874

'playlist_count': 96,

4875

}, {

4876

'note': 'Large playlist',

4877

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4878

'info_dict': {

4879

'title': 'Uploads from Cauchemar',

4880

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4881

'uploader': 'Cauchemar',

4882

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4883

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4884

'tags': [],

4885

'modified_date': r're:\d{8}',

4886

'channel': 'Cauchemar',

4887

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4888

'view_count': int,

4889

'description': '',

4890

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4891

},

4892

'playlist_mincount': 1123,

4893

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4894

}, {

4895

'note': 'even larger playlist, 8832 videos',

4896

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4897

'only_matching': True,

4898

}, {

4899

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4900

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4901

'info_dict': {

4902

'title': 'Uploads from Interstellar Movie',

4903

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4904

'uploader': 'Interstellar Movie',

4905

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4906

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4907

'tags': [],

4908

'view_count': int,

4909

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4910

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4911

'channel': 'Interstellar Movie',

4912

'description': '',

4913

'modified_date': r're:\d{8}',

4914

},

4915

'playlist_mincount': 21,

4916

}, {

4917

'note': 'Playlist with "show unavailable videos" button',

4918

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4919

'info_dict': {

4920

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4921

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4922

'uploader': 'Phim Siêu Nhân Nhật Bản',

4923

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4924

'view_count': int,

4925

'channel': 'Phim Siêu Nhân Nhật Bản',

4926

'tags': [],

4927

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4928

'description': '',

4929

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4930

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4931

'modified_date': r're:\d{8}',

4932

},

4933

'playlist_mincount': 200,

4934

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4935

}, {

4936

'note': 'Playlist with unavailable videos in page 7',

4937

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4938

'info_dict': {

4939

'title': 'Uploads from BlankTV',

4940

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4941

'uploader': 'BlankTV',

4942

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4943

'channel': 'BlankTV',

4944

'channel_url': 'https://www.youtube.com/c/blanktv',

4945

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4946

'view_count': int,

4947

'tags': [],

4948

'uploader_url': 'https://www.youtube.com/c/blanktv',

4949

'modified_date': r're:\d{8}',

4950

'description': '',

4951

},

4952

'playlist_mincount': 1000,

4953

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4954

}, {

4955

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4956

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4957

'info_dict': {

4958

'title': 'Data Analysis with Dr Mike Pound',

4959

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4960

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4961

'uploader': 'Computerphile',

4962

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4963

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4964

'tags': [],

4965

'view_count': int,

4966

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4967

'channel_url': 'https://www.youtube.com/user/Computerphile',

4968

'channel': 'Computerphile',

4969

},

4970

'playlist_mincount': 11,

4971

}, {

4972

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4973

'only_matching': True,

4974

}, {

4975

'note': 'Playlist URL that does not actually serve a playlist',

4976

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4981

'uploader': 'STREEM',

4982

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4983

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4984

'upload_date': '20150526',

4985

'license': 'Standard YouTube License',

4986

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4987

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4994

},

4995

'skip': 'This video is not available.',

4996

'add_ie': [YoutubeIE.ie_key()],

4997

}, {

4998

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4999

'only_matching': True,

5000

}, {

5001

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5002

'only_matching': True,

5003

}, {

5004

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5005

'info_dict': {

5006

'id': 'GgL890LIznQ', # This will keep changing

5007

'ext': 'mp4',

5008

'title': str,

5009

'uploader': 'Sky News',

5010

'uploader_id': 'skynews',

5011

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5012

'upload_date': r're:\d{8}',

5013

'description': str,

5014

'categories': ['News & Politics'],

5015

'tags': list,

5016

'like_count': int,

5017

'release_timestamp': 1642502819,

5018

'channel': 'Sky News',

5019

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5020

'age_limit': 0,

5021

'view_count': int,

5022

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5023

'playable_in_embed': True,

5024

'release_date': '20220118',

5025

'availability': 'public',

5026

'live_status': 'is_live',

5027

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5028

'channel_follower_count': int

5029

},

5030

'params': {

5031

'skip_download': True,

5032

},

5033

'expected_warnings': ['Ignoring subtitle tracks found in '],

5034

}, {

5035

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5040

'uploader': 'The Young Turks',

5041

'uploader_id': 'TheYoungTurks',

5042

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5043

'upload_date': '20150715',

5044

'license': 'Standard YouTube License',

5045

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5046

'categories': ['News & Politics'],

5047

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5052

},

5053

'only_matching': True,

5054

}, {

5055

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5056

'only_matching': True,

5057

}, {

5058

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5059

'only_matching': True,

5060

}, {

5061

'note': 'A channel that is not live. Should raise error',

5062

'url': 'https://www.youtube.com/user/numberphile/live',

5063

'only_matching': True,

5064

}, {

5065

'url': 'https://www.youtube.com/feed/trending',

5066

'only_matching': True,

5067

}, {

5068

'url': 'https://www.youtube.com/feed/library',

5069

'only_matching': True,

5070

}, {

5071

'url': 'https://www.youtube.com/feed/history',

5072

'only_matching': True,

5073

}, {

5074

'url': 'https://www.youtube.com/feed/subscriptions',

5075

'only_matching': True,

5076

}, {

5077

'url': 'https://www.youtube.com/feed/watch_later',

5078

'only_matching': True,

5079

}, {

5080

'note': 'Recommended - redirects to home page.',

5081

'url': 'https://www.youtube.com/feed/recommended',

5082

'only_matching': True,

5083

}, {

5084

'note': 'inline playlist with not always working continuations',

5085

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5086

'only_matching': True,

5087

}, {

5088

'url': 'https://www.youtube.com/course',

5089

'only_matching': True,

5090

}, {

5091

'url': 'https://www.youtube.com/zsecurity',

5092

'only_matching': True,

5093

}, {

5094

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5095

'only_matching': True,

5096

}, {

5097

'url': 'https://www.youtube.com/TheYoungTurks/live',

5098

'only_matching': True,

5099

}, {

5100

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5107

}, {

5108

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5109

'only_matching': True,

5110

}, {

5111

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5112

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5113

'only_matching': True

5114

}, {

5115

'note': '/browse/ should redirect to /channel/',

5116

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5117

'only_matching': True

5118

}, {

5119

'note': 'VLPL, should redirect to playlist?list=PL...',

5120

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5121

'info_dict': {

5122

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5123

'uploader': 'NoCopyrightSounds',

5124

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5125

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5126

'title': 'NCS Releases',

5127

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5128

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5129

'modified_date': r're:\d{8}',

5130

'view_count': int,

5131

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5132

'tags': [],

5133

'channel': 'NoCopyrightSounds',

5134

},

5135

'playlist_mincount': 166,

5136

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5137

}, {

5138

'note': 'Topic, should redirect to playlist?list=UU...',

5139

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5140

'info_dict': {

5141

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5142

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5143

'title': 'Uploads from Royalty Free Music - Topic',

5144

'uploader': 'Royalty Free Music - Topic',

5145

'tags': [],

5146

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5147

'channel': 'Royalty Free Music - Topic',

5148

'view_count': int,

5149

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5150

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5151

'modified_date': r're:\d{8}',

5152

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5153

'description': '',

5154

},

5155

'expected_warnings': [

5156

'The URL does not have a videos tab',

5157

r'[Uu]navailable videos (are|will be) hidden',

5158

],

5159

'playlist_mincount': 101,

5160

}, {

5161

'note': 'Topic without a UU playlist',

5162

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5163

'info_dict': {

5164

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5165

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5166

'tags': [],

5167

},

5168

'expected_warnings': [

5169

'the playlist redirect gave error',

5170

],

5171

'playlist_mincount': 9,

5172

}, {

5173

'note': 'Youtube music Album',

5174

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5175

'info_dict': {

5176

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5177

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5182

'modified_date': r're:\d{8}',

5183

},

5184

'playlist_count': 50,

5185

}, {

5186

'note': 'unlisted single video playlist',

5187

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5188

'info_dict': {

5189

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5190

'uploader': 'colethedj',

5191

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5192

'title': 'yt-dlp unlisted playlist test',

5193

'availability': 'unlisted',

5194

'tags': [],

5195

'modified_date': '20211208',

5196

'channel': 'colethedj',

5197

'view_count': int,

5198

'description': '',

5199

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5200

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5201

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5206

'url': 'https://www.youtube.com/feed/recommended',

5207

'info_dict': {

5208

'id': 'recommended',

5209

'title': 'recommended',

5210

'tags': [],

5211

},

5212

'playlist_mincount': 50,

5213

'params': {

5214

'skip_download': True,

5215

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5216

},

5217

}, {

5218

'note': 'API Fallback: /videos tab, sorted by oldest first',

5219

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5220

'info_dict': {

5221

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5222

'title': 'Cody\'sLab - Videos',

5223

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5224

'uploader': 'Cody\'sLab',

5225

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5226

'channel': 'Cody\'sLab',

5227

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5228

'tags': [],

5229

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5230

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5231

'channel_follower_count': int

5232

},

5233

'playlist_mincount': 650,

5234

'params': {

5235

'skip_download': True,

5236

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5237

},

5238

}, {

5239

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5240

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5241

'info_dict': {

5242

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5243

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5244

'title': 'Uploads from Royalty Free Music - Topic',

5245

'uploader': 'Royalty Free Music - Topic',

5246

'modified_date': r're:\d{8}',

5247

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5248

'description': '',

5249

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5250

'tags': [],

5251

'channel': 'Royalty Free Music - Topic',

5252

'view_count': int,

5253

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5254

},

5255

'expected_warnings': [

5256

'does not have a videos tab',

5257

r'[Uu]navailable videos (are|will be) hidden',

5258

],

5259

'playlist_mincount': 101,

5260

'params': {

5261

'skip_download': True,

5262

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5263

},

5264

}, {

5265

'note': 'non-standard redirect to regional channel',

5266

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5267

'only_matching': True

5268

}, {

5269

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5270

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5271

'info_dict': {

5272

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5273

'modified_date': '20220407',

5274

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5275

'tags': [],

5276

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5277

'uploader': 'pukkandan',

5278

'availability': 'unlisted',

5279

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5280

'channel': 'pukkandan',

5281

'description': 'Test for collaborative playlist',

5282

'title': 'yt-dlp test - collaborative playlist',

5283

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5284

},

5285

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5290

return False if YoutubeIE.suitable(url) else super().suitable(url)

5291

5292

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5293

5294

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5295

def _real_extract(self, url, smuggled_data):

5296

item_id = self._match_id(url)

5297

url = urllib.parse.urlunparse(

5298

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5299

compat_opts = self.get_param('compat_opts', [])

5300

5301

def get_mobj(url):

5302

mobj = self._URL_RE.match(url).groupdict()

5303

mobj.update((k, '') for k, v in mobj.items() if v is None)

5304

return mobj

5305

5306

mobj, redirect_warning = get_mobj(url), None

5307

# Youtube returns incomplete data if tabname is not lower case

5308

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5309

if is_channel:

5310

if smuggled_data.get('is_music_url'):

5311

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5312

item_id = item_id[2:]

5313

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5314

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5315

mdata = self._extract_tab_endpoint(

5316

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5317

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5318

get_all=False, expected_type=str)

5319

if not murl:

5320

raise ExtractorError('Failed to resolve album to playlist')

5321

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5322

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5323

pre = f'https://www.youtube.com/channel/{item_id}'

5324

5325

original_tab_name = tab

5326

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5327

# Home URLs should redirect to /videos/

5328

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5329

'To download only the videos in the home page, add a "/featured" to the URL')

5330

tab = '/videos'

5331

5332

url = ''.join((pre, tab, post))

5333

mobj = get_mobj(url)

5334

5335

# Handle both video/playlist URLs

5336

qs = parse_qs(url)

5337

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5338

5339

if not video_id and mobj['not_channel'].startswith('watch'):

5340

if not playlist_id:

5341

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5342

raise ExtractorError('Unable to recognize tab page')

5343

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5344

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5345

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5346

mobj = get_mobj(url)

5347

5348

if video_id and playlist_id:

5349

if self.get_param('noplaylist'):

5350

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5351

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5352

ie=YoutubeIE.ie_key(), video_id=video_id)

5353

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5354

5355

data, ytcfg = self._extract_data(url, item_id)

5356

5357

# YouTube may provide a non-standard redirect to the regional channel

5358

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5359

redirect_url = traverse_obj(

5360

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5361

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5362

redirect_url = ''.join((

5363

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5364

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5365

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5366

5367

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5368

if tabs:

5369

selected_tab = self._extract_selected_tab(tabs)

5370

selected_tab_name = selected_tab.get('title', '').lower()

5371

if selected_tab_name == 'home':

5372

selected_tab_name = 'featured'

5373

requested_tab_name = mobj['tab'][1:]

5374

if 'no-youtube-channel-redirect' not in compat_opts:

5375

if requested_tab_name == 'live':

5376

# Live tab should have redirected to the video

5377

raise ExtractorError('The channel is not currently live', expected=True)

5378

if requested_tab_name not in ('', selected_tab_name):

5379

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5380

if not original_tab_name:

5381

if item_id[:2] == 'UC':

5382

# Topic channels don't have /videos. Use the equivalent playlist instead

5383

pl_id = f'UU{item_id[2:]}'

5384

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5385

try:

5386

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5387

except ExtractorError:

5388

redirect_warning += ' and the playlist redirect gave error'

5389

else:

5390

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5391

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5392

if selected_tab_name and selected_tab_name != requested_tab_name:

5393

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5394

else:

5395

raise ExtractorError(redirect_warning, expected=True)

5396

5397

if redirect_warning:

5398

self.to_screen(redirect_warning)

5399

self.write_debug(f'Final URL: {url}')

5400

5401

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5402

if 'no-youtube-unavailable-videos' not in compat_opts:

5403

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5404

self._extract_and_report_alerts(data, only_once=True)

5405

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5406

if tabs:

5407

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5408

5409

playlist = traverse_obj(

5410

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5411

if playlist:

5412

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5413

5414

video_id = traverse_obj(

5415

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5416

if video_id:

5417

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5418

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5419

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5420

ie=YoutubeIE.ie_key(), video_id=video_id)

5421

5422

raise ExtractorError('Unable to recognize tab page')

5423

5424

5425

class YoutubePlaylistIE(InfoExtractor):

5426

IE_DESC = 'YouTube playlists'

5427

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5438

)''' % {

5439

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5440

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5441

}

5442

IE_NAME = 'youtube:playlist'

5443

_TESTS = [{

5444

'note': 'issue #673',

5445

'url': 'PLBB231211A4F62143',

5446

'info_dict': {

5447

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5448

'id': 'PLBB231211A4F62143',

5449

'uploader': 'Wickman',

5450

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5451

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5452

'view_count': int,

5453

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5454

'modified_date': r're:\d{8}',

5455

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5456

'channel': 'Wickman',

5457

'tags': [],

5458

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5459

},

5460

'playlist_mincount': 29,

5461

}, {

5462

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5463

'info_dict': {

5464

'title': 'YDL_safe_search',

5465

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5466

},

5467

'playlist_count': 2,

5468

'skip': 'This playlist is private',

5469

}, {

5470

'note': 'embedded',

5471

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5476

'uploader': 'milan',

5477

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5478

'description': '',

5479

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5480

'tags': [],

5481

'modified_date': '20140919',

5482

'view_count': int,

5483

'channel': 'milan',

5484

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5485

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5486

},

5487

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5488

}, {

5489

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5490

'playlist_mincount': 654,

5491

'info_dict': {

5492

'title': '2018 Chinese New Singles (11/6 updated)',

5493

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5494

'uploader': 'LBK',

5495

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5496

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5497

'channel': 'LBK',

5498

'view_count': int,

5499

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5500

'tags': [],

5501

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5502

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5503

'modified_date': r're:\d{8}',

5504

},

5505

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5506

}, {

5507

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5508

'only_matching': True,

5509

}, {

5510

# music album playlist

5511

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5512

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5517

if YoutubeTabIE.suitable(url):

5518

return False

5519

from ..utils import parse_qs

5520

qs = parse_qs(url)

5521

if qs.get('v', [None])[0]:

5522

return False

5523

return super().suitable(url)

5524

5525

def _real_extract(self, url):

5526

playlist_id = self._match_id(url)

5527

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5528

url = update_url_query(

5529

'https://www.youtube.com/playlist',

5530

parse_qs(url) or {'list': playlist_id})

5531

if is_music_url:

5532

url = smuggle_url(url, {'is_music_url': True})

5533

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5534

5535

5536

class YoutubeYtBeIE(InfoExtractor):

5537

IE_DESC = 'youtu.be'

5538

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5539

_TESTS = [{

5540

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5545

'uploader': 'Backus-Page House Museum',

5546

'uploader_id': 'backuspagemuseum',

5547

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5548

'upload_date': '20161008',

5549

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5550

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5555

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5556

'channel': 'Backus-Page House Museum',

5557

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5558

'live_status': 'not_live',

5559

'view_count': int,

5560

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5561

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5567

},

5568

}, {

5569

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5570

'only_matching': True,

5571

}]

5572

5573

def _real_extract(self, url):

5574

mobj = self._match_valid_url(url)

5575

video_id = mobj.group('id')

5576

playlist_id = mobj.group('playlist_id')

5577

return self.url_result(

5578

update_url_query('https://www.youtube.com/watch', {

5579

'v': video_id,

5580

'list': playlist_id,

5581

'feature': 'youtu.be',

5582

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5583

5584

5585

class YoutubeLivestreamEmbedIE(InfoExtractor):

5586

IE_DESC = 'YouTube livestream embeds'

5587

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5588

_TESTS = [{

5589

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5590

'only_matching': True,

5591

}]

5592

5593

def _real_extract(self, url):

5594

channel_id = self._match_id(url)

5595

return self.url_result(

5596

f'https://www.youtube.com/channel/{channel_id}/live',

5597

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5598

5599

5600

class YoutubeYtUserIE(InfoExtractor):

5601

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5602

IE_NAME = 'youtube:user'

5603

_VALID_URL = r'ytuser:(?P<id>.+)'

5604

_TESTS = [{

5605

'url': 'ytuser:phihag',

5606

'only_matching': True,

5607

}]

5608

5609

def _real_extract(self, url):

5610

user_id = self._match_id(url)

5611

return self.url_result(

5612

'https://www.youtube.com/user/%s/videos' % user_id,

5613

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5614

5615

5616

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5617

IE_NAME = 'youtube:favorites'

5618

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5619

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5620

_LOGIN_REQUIRED = True

5621

_TESTS = [{

5622

'url': ':ytfav',

5623

'only_matching': True,

5624

}, {

5625

'url': ':ytfavorites',

5626

'only_matching': True,

5627

}]

5628

5629

def _real_extract(self, url):

5630

return self.url_result(

5631

'https://www.youtube.com/playlist?list=LL',

5632

ie=YoutubeTabIE.ie_key())

5633

5634

5635

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5636

IE_NAME = 'youtube:notif'

5637

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5638

_VALID_URL = r':ytnotif(?:ication)?s?'

5639

_LOGIN_REQUIRED = True

5640

_TESTS = [{

5641

'url': ':ytnotif',

5642

'only_matching': True,

5643

}, {

5644

'url': ':ytnotifications',

5645

'only_matching': True,

5646

}]

5647

5648

def _extract_notification_menu(self, response, continuation_list):

5649

notification_list = traverse_obj(

5650

response,

5651

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5652

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5653

expected_type=list) or []

5654

continuation_list[0] = None

5655

for item in notification_list:

5656

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5657

if entry:

5658

yield entry

5659

continuation = item.get('continuationItemRenderer')

5660

if continuation:

5661

continuation_list[0] = continuation

5662

5663

def _extract_notification_renderer(self, notification):

5664

video_id = traverse_obj(

5665

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5666

url = f'https://www.youtube.com/watch?v={video_id}'

5667

channel_id = None

5668

if not video_id:

5669

browse_ep = traverse_obj(

5670

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5671

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5672

post_id = self._search_regex(

5673

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5674

'post id', default=None)

5675

if not channel_id or not post_id:

5676

return

5677

# The direct /post url redirects to this in the browser

5678

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5679

5680

channel = traverse_obj(

5681

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5682

expected_type=str)

5683

notification_title = self._get_text(notification, 'shortMessage')

5684

if notification_title:

5685

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5686

# TODO: handle recommended videos

5687

title = self._search_regex(

5688

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5689

'video title', default=None)

5690

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5691

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5697

'video_id': video_id,

5698

'title': title,

5699

'channel_id': channel_id,

5700

'channel': channel,

5701

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5702

'upload_date': upload_date,

5703

}

5704

5705

def _notification_menu_entries(self, ytcfg):

5706

continuation_list = [None]

5707

response = None

5708

for page in itertools.count(1):

5709

ctoken = traverse_obj(

5710

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5711

response = self._extract_response(

5712

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5713

ep='notification/get_notification_menu', check_get_keys='actions',

5714

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5715

yield from self._extract_notification_menu(response, continuation_list)

5716

if not continuation_list[0]:

5717

break

5718

5719

def _real_extract(self, url):

5720

display_id = 'notifications'

5721

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5722

self._report_playlist_authcheck(ytcfg)

5723

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5724

5725

5726

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5727

IE_DESC = 'YouTube search'

5728

IE_NAME = 'youtube:search'

5729

_SEARCH_KEY = 'ytsearch'

5730

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5731

_TESTS = [{

5732

'url': 'ytsearch5:youtube-dl test video',

5733

'playlist_count': 5,

5734

'info_dict': {

5735

'id': 'youtube-dl test video',

5736

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5742

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5743

_SEARCH_KEY = 'ytsearchdate'

5744

IE_DESC = 'YouTube search, newest videos first'

5745

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5746

_TESTS = [{

5747

'url': 'ytsearchdate5:youtube-dl test video',

5748

'playlist_count': 5,

5749

'info_dict': {

5750

'id': 'youtube-dl test video',

5751

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5757

IE_DESC = 'YouTube search URLs with sorting and filter support'

5758

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5759

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5760

_TESTS = [{

5761

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5762

'playlist_mincount': 5,

5763

'info_dict': {

5764

'id': 'youtube-dl test video',

5765

'title': 'youtube-dl test video',

5766

}

5767

}, {

5768

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5769

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5776

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5787

'only_matching': True,

5788

}]

5789

5790

def _real_extract(self, url):

5791

qs = parse_qs(url)

5792

query = (qs.get('search_query') or qs.get('q'))[0]

5793

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5794

5795

5796

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5797

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5798

IE_NAME = 'youtube:music:search_url'

5799

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5800

_TESTS = [{

5801

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5802

'playlist_count': 16,

5803

'info_dict': {

5804

'id': 'royalty free music',

5805

'title': 'royalty free music',

5806

}

5807

}, {

5808

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5809

'playlist_mincount': 30,

5810

'info_dict': {

5811

'id': 'royalty free music - songs',

5812

'title': 'royalty free music - songs',

5813

},

5814

'params': {'extract_flat': 'in_playlist'}

5815

}, {

5816

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5817

'playlist_mincount': 30,

5818

'info_dict': {

5819

'id': 'royalty free music - community playlists',

5820

'title': 'royalty free music - community playlists',

5821

},

5822

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5827

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5828

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5829

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5830

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5831

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5832

}

5833

5834

def _real_extract(self, url):

5835

qs = parse_qs(url)

5836

query = (qs.get('search_query') or qs.get('q'))[0]

5837

params = qs.get('sp', (None,))[0]

5838

if params:

5839

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5840

else:

5841

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5842

params = self._SECTIONS.get(section)

5843

if not params:

5844

section = None

5845

title = join_nonempty(query, section, delim=' - ')

5846

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5847

5848

5849

class YoutubeFeedsInfoExtractor(InfoExtractor):

5850

"""

5851

Base class for feed extractors

5852

Subclasses must re-define the _FEED_NAME property.

5853

"""

5854

_LOGIN_REQUIRED = True

5855

_FEED_NAME = 'feeds'

5856

5857

def _real_initialize(self):

5858

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5863

5864

def _real_extract(self, url):

5865

return self.url_result(

5866

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5867

5868

5869

class YoutubeWatchLaterIE(InfoExtractor):

5870

IE_NAME = 'youtube:watchlater'

5871

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5872

_VALID_URL = r':ytwatchlater'

5873

_TESTS = [{

5874

'url': ':ytwatchlater',

5875

'only_matching': True,

5876

}]

5877

5878

def _real_extract(self, url):

5879

return self.url_result(

5880

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5881

5882

5883

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5884

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5885

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5886

_FEED_NAME = 'recommended'

5887

_LOGIN_REQUIRED = False

5888

_TESTS = [{

5889

'url': ':ytrec',

5890

'only_matching': True,

5891

}, {

5892

'url': ':ytrecommended',

5893

'only_matching': True,

5894

}, {

5895

'url': 'https://youtube.com',

5896

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5901

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5902

_VALID_URL = r':ytsub(?:scription)?s?'

5903

_FEED_NAME = 'subscriptions'

5904

_TESTS = [{

5905

'url': ':ytsubs',

5906

'only_matching': True,

5907

}, {

5908

'url': ':ytsubscriptions',

5909

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5914

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5915

_VALID_URL = r':ythis(?:tory)?'

5916

_FEED_NAME = 'history'

5917

_TESTS = [{

5918

'url': ':ythistory',

5919

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5924

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5925

IE_NAME = 'youtube:stories'

5926

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5927

_TESTS = [{

5928

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5929

'only_matching': True,

5930

}]

5931

5932

def _real_extract(self, url):

5933

playlist_id = f'RLTD{self._match_id(url)}'

5934

return self.url_result(

5935

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5936

ie=YoutubeTabIE, video_id=playlist_id)

5937

5938

5939

class YoutubeTruncatedURLIE(InfoExtractor):

5940

IE_NAME = 'youtube:truncated_url'

5941

IE_DESC = False # Do not list

5942

_VALID_URL = r'''(?x)

5943

(?:https?://)?

5944

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5945

(?:watch\?(?:

5946

feature=[a-z_]+|

5947

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5960

'only_matching': True,

5961

}, {

5962

'url': 'https://www.youtube.com/watch?',

5963

'only_matching': True,

5964

}, {

5965

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5966

'only_matching': True,

5967

}, {

5968

'url': 'https://www.youtube.com/watch?feature=foo',

5969

'only_matching': True,

5970

}, {

5971

'url': 'https://www.youtube.com/watch?hl=en-GB',

5972

'only_matching': True,

5973

}, {

5974

'url': 'https://www.youtube.com/watch?t=2372',

5975

'only_matching': True,

5976

}]

5977

5978

def _real_extract(self, url):

5979

raise ExtractorError(

5980

'Did you forget to quote the URL? Remember that & is a meta '

5981

'character in most shells, so you want to put the URL in quotes, '

5982

'like youtube-dl '

5983

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5984

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

5989

IE_NAME = 'youtube:clip'

5990

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

5991

_TESTS = [{

5992

# FIXME: Other metadata should be extracted from the clip, not from the base video

5993

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

5994

'info_dict': {

5995

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

5996

'ext': 'mp4',

5997

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

}

}]

def _real_extract(self, url):

6004

clip_id = self._match_id(url)

6005

_, data = self._extract_webpage(url, clip_id)

6006

6007

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6008

if not video_id:

6009

raise ExtractorError('Unable to find video ID')

6010

6011

clip_data = traverse_obj(data, (

6012

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6013

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6014

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6015

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6016

6017

return {

6018

'_type': 'url_transparent',

6019

'url': f'https://www.youtube.com/watch?v={video_id}',

6020

'ie_key': YoutubeIE.ie_key(),

6021

'id': clip_id,

6022

'section_start': int(clip_data['startTimeMs']) / 1000,

6023

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6028

IE_NAME = 'youtube:truncated_id'

6029

IE_DESC = False # Do not list

6030

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6031

6032

_TESTS = [{

6033

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6034

'only_matching': True,

6035

}]

6036

6037

def _real_extract(self, url):

6038

video_id = self._match_id(url)

6039

raise ExtractorError(

6040

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6041

expected=True)